aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2024-07-27 23:34:35 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-10-23 18:26:01 +0000
commit0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583 (patch)
tree6cf5ab1f05330c6773b1f3f64799d56a9c7a1faa /contrib/llvm-project/llvm/lib/Transforms/Scalar
parent6b9f7133aba44189d9625c352bc2c2a59baf18ef (diff)
parentac9a064cb179f3425b310fa2847f8764ac970a4d (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Scalar')
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp54
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp102
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp316
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp130
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp200
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp100
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp81
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAlignment.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp190
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp216
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp186
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp89
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp307
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp1679
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp87
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp775
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp95
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp342
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp265
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp293
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp800
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp97
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp119
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp73
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp30
67 files changed, 3836 insertions, 3602 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
index 90b544c89226..5f0a9b22c3ee 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -350,7 +350,7 @@ bool AggressiveDeadCodeElimination::isInstrumentsConstant(Instruction &I) {
// TODO -- move this test into llvm::isInstructionTriviallyDead
if (CallInst *CI = dyn_cast<CallInst>(&I))
if (Function *Callee = CI->getCalledFunction())
- if (Callee->getName().equals(getInstrProfValueProfFuncName()))
+ if (Callee->getName() == getInstrProfValueProfFuncName())
if (isa<Constant>(CI->getArgOperand(0)))
return true;
return false;
@@ -544,19 +544,20 @@ ADCEChanged AggressiveDeadCodeElimination::removeDeadInstructions() {
// value of the function, and may therefore be deleted safely.
// NOTE: We reuse the Worklist vector here for memory efficiency.
for (Instruction &I : llvm::reverse(instructions(F))) {
- // With "RemoveDIs" debug-info stored in DPValue objects, debug-info
- // attached to this instruction, and drop any for scopes that aren't alive,
- // like the rest of this loop does. Extending support to assignment tracking
- // is future work.
- for (DPValue &DPV : make_early_inc_range(I.getDbgValueRange())) {
- // Avoid removing a DPV that is linked to instructions because it holds
+ // With "RemoveDIs" debug-info stored in DbgVariableRecord objects,
+ // debug-info attached to this instruction, and drop any for scopes that
+ // aren't alive, like the rest of this loop does. Extending support to
+ // assignment tracking is future work.
+ for (DbgRecord &DR : make_early_inc_range(I.getDbgRecordRange())) {
+ // Avoid removing a DVR that is linked to instructions because it holds
// information about an existing store.
- if (DPV.isDbgAssign())
- if (!at::getAssignmentInsts(&DPV).empty())
+ if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
+ DVR && DVR->isDbgAssign())
+ if (!at::getAssignmentInsts(DVR).empty())
continue;
- if (AliveScopes.count(DPV.getDebugLoc()->getScope()))
+ if (AliveScopes.count(DR.getDebugLoc()->getScope()))
continue;
- I.dropOneDbgValue(&DPV);
+ I.dropOneDbgRecord(&DR);
}
// Check if the instruction is alive.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp
index b182f46cc515..5d9a7bca7efe 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp
@@ -33,7 +33,7 @@ static void tryEmitAutoInitRemark(ArrayRef<Instruction *> Instructions,
continue;
Function &F = *I->getParent()->getParent();
- const DataLayout &DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getDataLayout();
AutoInitRemark Remark(ORE, REMARK_PASS, DL, TLI);
Remark.visit(I);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp
index 1fa2c75b0f42..d96dbca30fdb 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -23,10 +23,13 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
+
using namespace llvm;
+using namespace PatternMatch;
#define DEBUG_TYPE "bdce"
@@ -42,15 +45,17 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
assert(I->getType()->isIntOrIntVectorTy() &&
"Trivializing a non-integer value?");
+ // If all bits of a user are demanded, then we know that nothing below that
+ // in the def-use chain needs to be changed.
+ if (DB.getDemandedBits(I).isAllOnes())
+ return;
+
// Initialize the worklist with eligible direct users.
SmallPtrSet<Instruction *, 16> Visited;
SmallVector<Instruction *, 16> WorkList;
for (User *JU : I->users()) {
- // If all bits of a user are demanded, then we know that nothing below that
- // in the def-use chain needs to be changed.
- auto *J = dyn_cast<Instruction>(JU);
- if (J && J->getType()->isIntOrIntVectorTy() &&
- !DB.getDemandedBits(J).isAllOnes()) {
+ auto *J = cast<Instruction>(JU);
+ if (J->getType()->isIntOrIntVectorTy()) {
Visited.insert(J);
WorkList.push_back(J);
}
@@ -70,18 +75,19 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
Instruction *J = WorkList.pop_back_val();
// NSW, NUW, and exact are based on operands that might have changed.
- J->dropPoisonGeneratingFlags();
+ J->dropPoisonGeneratingAnnotations();
- // We do not have to worry about llvm.assume or range metadata:
- // 1. llvm.assume demands its operand, so trivializing can't change it.
- // 2. range metadata only applies to memory accesses which demand all bits.
+ // We do not have to worry about llvm.assume, because it demands its
+ // operand, so trivializing can't change it.
+
+ // If all bits of a user are demanded, then we know that nothing below
+ // that in the def-use chain needs to be changed.
+ if (DB.getDemandedBits(J).isAllOnes())
+ continue;
for (User *KU : J->users()) {
- // If all bits of a user are demanded, then we know that nothing below
- // that in the def-use chain needs to be changed.
- auto *K = dyn_cast<Instruction>(KU);
- if (K && Visited.insert(K).second && K->getType()->isIntOrIntVectorTy() &&
- !DB.getDemandedBits(K).isAllOnes())
+ auto *K = cast<Instruction>(KU);
+ if (Visited.insert(K).second && K->getType()->isIntOrIntVectorTy())
WorkList.push_back(K);
}
}
@@ -125,6 +131,38 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
}
}
+ // Simplify and, or, xor when their mask does not affect the demanded bits.
+ if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
+ APInt Demanded = DB.getDemandedBits(BO);
+ if (!Demanded.isAllOnes()) {
+ const APInt *Mask;
+ if (match(BO->getOperand(1), m_APInt(Mask))) {
+ bool CanBeSimplified = false;
+ switch (BO->getOpcode()) {
+ case Instruction::Or:
+ case Instruction::Xor:
+ CanBeSimplified = !Demanded.intersects(*Mask);
+ break;
+ case Instruction::And:
+ CanBeSimplified = Demanded.isSubsetOf(*Mask);
+ break;
+ default:
+ // TODO: Handle more cases here.
+ break;
+ }
+
+ if (CanBeSimplified) {
+ clearAssumptionsOfUsers(BO, DB);
+ BO->replaceAllUsesWith(BO->getOperand(0));
+ Worklist.push_back(BO);
+ ++NumSimplified;
+ Changed = true;
+ continue;
+ }
+ }
+ }
+ }
+
for (Use &U : I.operands()) {
// DemandedBits only detects dead integer uses.
if (!U->getType()->isIntOrIntVectorTy())
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 47f663fa0cf0..b8571ba07489 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -403,7 +403,7 @@ static void splitCallSite(CallBase &CB,
NewPN->insertBefore(*TailBB, TailBB->begin());
CurrentI->replaceAllUsesWith(NewPN);
}
- CurrentI->dropDbgValues();
+ CurrentI->dropDbgRecords();
CurrentI->eraseFromParent();
// We are done once we handled the first original instruction in TailBB.
if (CurrentI == OriginalBeginInst)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 49f8761a1392..4a6dedc93d30 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -43,6 +43,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -162,27 +163,27 @@ bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) {
void ConstantHoistingPass::collectMatInsertPts(
const RebasedConstantListType &RebasedConstants,
- SmallVectorImpl<Instruction *> &MatInsertPts) const {
+ SmallVectorImpl<BasicBlock::iterator> &MatInsertPts) const {
for (const RebasedConstantInfo &RCI : RebasedConstants)
for (const ConstantUser &U : RCI.Uses)
MatInsertPts.emplace_back(findMatInsertPt(U.Inst, U.OpndIdx));
}
/// Find the constant materialization insertion point.
-Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst,
- unsigned Idx) const {
+BasicBlock::iterator ConstantHoistingPass::findMatInsertPt(Instruction *Inst,
+ unsigned Idx) const {
// If the operand is a cast instruction, then we have to materialize the
// constant before the cast instruction.
if (Idx != ~0U) {
Value *Opnd = Inst->getOperand(Idx);
if (auto CastInst = dyn_cast<Instruction>(Opnd))
if (CastInst->isCast())
- return CastInst;
+ return CastInst->getIterator();
}
// The simple and common case. This also includes constant expressions.
if (!isa<PHINode>(Inst) && !Inst->isEHPad())
- return Inst;
+ return Inst->getIterator();
// We can't insert directly before a phi node or an eh pad. Insert before
// the terminator of the incoming or dominating block.
@@ -191,7 +192,7 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst,
if (Idx != ~0U && isa<PHINode>(Inst)) {
InsertionBlock = cast<PHINode>(Inst)->getIncomingBlock(Idx);
if (!InsertionBlock->isEHPad()) {
- return InsertionBlock->getTerminator();
+ return InsertionBlock->getTerminator()->getIterator();
}
} else {
InsertionBlock = Inst->getParent();
@@ -206,7 +207,7 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst,
IDom = IDom->getIDom();
}
- return IDom->getBlock()->getTerminator();
+ return IDom->getBlock()->getTerminator()->getIterator();
}
/// Given \p BBs as input, find another set of BBs which collectively
@@ -314,26 +315,27 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
}
/// Find an insertion point that dominates all uses.
-SetVector<Instruction *> ConstantHoistingPass::findConstantInsertionPoint(
+SetVector<BasicBlock::iterator>
+ConstantHoistingPass::findConstantInsertionPoint(
const ConstantInfo &ConstInfo,
- const ArrayRef<Instruction *> MatInsertPts) const {
+ const ArrayRef<BasicBlock::iterator> MatInsertPts) const {
assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry.");
// Collect all basic blocks.
SetVector<BasicBlock *> BBs;
- SetVector<Instruction *> InsertPts;
+ SetVector<BasicBlock::iterator> InsertPts;
- for (Instruction *MatInsertPt : MatInsertPts)
+ for (BasicBlock::iterator MatInsertPt : MatInsertPts)
BBs.insert(MatInsertPt->getParent());
if (BBs.count(Entry)) {
- InsertPts.insert(&Entry->front());
+ InsertPts.insert(Entry->begin());
return InsertPts;
}
if (BFI) {
findBestInsertionSet(*DT, *BFI, Entry, BBs);
for (BasicBlock *BB : BBs)
- InsertPts.insert(&*BB->getFirstInsertionPt());
+ InsertPts.insert(BB->getFirstInsertionPt());
return InsertPts;
}
@@ -343,7 +345,7 @@ SetVector<Instruction *> ConstantHoistingPass::findConstantInsertionPoint(
BB2 = BBs.pop_back_val();
BB = DT->findNearestCommonDominator(BB1, BB2);
if (BB == Entry) {
- InsertPts.insert(&Entry->front());
+ InsertPts.insert(Entry->begin());
return InsertPts;
}
BBs.insert(BB);
@@ -363,6 +365,9 @@ SetVector<Instruction *> ConstantHoistingPass::findConstantInsertionPoint(
void ConstantHoistingPass::collectConstantCandidates(
ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx,
ConstantInt *ConstInt) {
+ if (ConstInt->getType()->isVectorTy())
+ return;
+
InstructionCost Cost;
// Ask the target about the cost of materializing the constant for the given
// instruction and operand index.
@@ -761,11 +766,13 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
Mat = GetElementPtrInst::Create(Type::getInt8Ty(*Ctx), Base, Adj->Offset,
"mat_gep", Adj->MatInsertPt);
// Hide it behind a bitcast.
- Mat = new BitCastInst(Mat, Adj->Ty, "mat_bitcast", Adj->MatInsertPt);
+ Mat = new BitCastInst(Mat, Adj->Ty, "mat_bitcast",
+ Adj->MatInsertPt->getIterator());
} else
// Constant being rebased is a ConstantInt.
- Mat = BinaryOperator::Create(Instruction::Add, Base, Adj->Offset,
- "const_mat", Adj->MatInsertPt);
+ Mat =
+ BinaryOperator::Create(Instruction::Add, Base, Adj->Offset,
+ "const_mat", Adj->MatInsertPt->getIterator());
LLVM_DEBUG(dbgs() << "Materialize constant (" << *Base->getOperand(0)
<< " + " << *Adj->Offset << ") in BB "
@@ -816,7 +823,8 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
// Aside from constant GEPs, only constant cast expressions are collected.
assert(ConstExpr->isCast() && "ConstExpr should be a cast");
- Instruction *ConstExprInst = ConstExpr->getAsInstruction(Adj->MatInsertPt);
+ Instruction *ConstExprInst = ConstExpr->getAsInstruction();
+ ConstExprInst->insertBefore(Adj->MatInsertPt);
ConstExprInst->setOperand(0, Mat);
// Use the same debug location as the instruction we are about to update.
@@ -842,9 +850,9 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {
SmallVectorImpl<consthoist::ConstantInfo> &ConstInfoVec =
BaseGV ? ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;
for (const consthoist::ConstantInfo &ConstInfo : ConstInfoVec) {
- SmallVector<Instruction *, 4> MatInsertPts;
+ SmallVector<BasicBlock::iterator, 4> MatInsertPts;
collectMatInsertPts(ConstInfo.RebasedConstants, MatInsertPts);
- SetVector<Instruction *> IPSet =
+ SetVector<BasicBlock::iterator> IPSet =
findConstantInsertionPoint(ConstInfo, MatInsertPts);
// We can have an empty set if the function contains unreachable blocks.
if (IPSet.empty())
@@ -853,7 +861,7 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {
unsigned UsesNum = 0;
unsigned ReBasesNum = 0;
unsigned NotRebasedNum = 0;
- for (Instruction *IP : IPSet) {
+ for (const BasicBlock::iterator &IP : IPSet) {
// First, collect constants depending on this IP of the base.
UsesNum = 0;
SmallVector<UserAdjustment, 4> ToBeRebased;
@@ -861,7 +869,7 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {
for (auto const &RCI : ConstInfo.RebasedConstants) {
UsesNum += RCI.Uses.size();
for (auto const &U : RCI.Uses) {
- Instruction *MatInsertPt = MatInsertPts[MatCtr++];
+ const BasicBlock::iterator &MatInsertPt = MatInsertPts[MatCtr++];
BasicBlock *OrigMatInsertBB = MatInsertPt->getParent();
// If Base constant is to be inserted in multiple places,
// generate rebase for U using the Base dominating U.
@@ -941,7 +949,7 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
this->TTI = &TTI;
this->DT = &DT;
this->BFI = BFI;
- this->DL = &Fn.getParent()->getDataLayout();
+ this->DL = &Fn.getDataLayout();
this->Ctx = &Fn.getContext();
this->Entry = &Entry;
this->PSI = PSI;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 7b672e89b67a..c31173879af1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
@@ -231,8 +232,8 @@ struct ConstraintTy {
ConstraintTy(SmallVector<int64_t, 8> Coefficients, bool IsSigned, bool IsEq,
bool IsNe)
- : Coefficients(Coefficients), IsSigned(IsSigned), IsEq(IsEq), IsNe(IsNe) {
- }
+ : Coefficients(std::move(Coefficients)), IsSigned(IsSigned), IsEq(IsEq),
+ IsNe(IsNe) {}
unsigned size() const { return Coefficients.size(); }
@@ -461,7 +462,7 @@ static Decomposition decomposeGEP(GEPOperator &GEP,
// If Op0 is signed non-negative, the GEP is increasing monotonically and
// can be de-composed.
- if (!isKnownNonNegative(Index, DL, /*Depth=*/MaxAnalysisRecursionDepth - 1))
+ if (!isKnownNonNegative(Index, DL))
Preconditions.emplace_back(CmpInst::ICMP_SGE, Index,
ConstantInt::get(Index->getType(), 0));
}
@@ -499,6 +500,8 @@ static Decomposition decompose(Value *V,
if (!Ty->isIntegerTy() || Ty->getIntegerBitWidth() > 64)
return V;
+ bool IsKnownNonNegative = false;
+
// Decompose \p V used with a signed predicate.
if (IsSigned) {
if (auto *CI = dyn_cast<ConstantInt>(V)) {
@@ -507,6 +510,14 @@ static Decomposition decompose(Value *V,
}
Value *Op0;
Value *Op1;
+
+ if (match(V, m_SExt(m_Value(Op0))))
+ V = Op0;
+ else if (match(V, m_NNegZExt(m_Value(Op0)))) {
+ V = Op0;
+ IsKnownNonNegative = true;
+ }
+
if (match(V, m_NSWAdd(m_Value(Op0), m_Value(Op1))))
return MergeResults(Op0, Op1, IsSigned);
@@ -529,7 +540,7 @@ static Decomposition decompose(Value *V,
}
}
- return V;
+ return {V, IsKnownNonNegative};
}
if (auto *CI = dyn_cast<ConstantInt>(V)) {
@@ -539,22 +550,27 @@ static Decomposition decompose(Value *V,
}
Value *Op0;
- bool IsKnownNonNegative = false;
if (match(V, m_ZExt(m_Value(Op0)))) {
IsKnownNonNegative = true;
V = Op0;
}
+ if (match(V, m_SExt(m_Value(Op0)))) {
+ V = Op0;
+ Preconditions.emplace_back(CmpInst::ICMP_SGE, Op0,
+ ConstantInt::get(Op0->getType(), 0));
+ }
+
Value *Op1;
ConstantInt *CI;
if (match(V, m_NUWAdd(m_Value(Op0), m_Value(Op1)))) {
return MergeResults(Op0, Op1, IsSigned);
}
if (match(V, m_NSWAdd(m_Value(Op0), m_Value(Op1)))) {
- if (!isKnownNonNegative(Op0, DL, /*Depth=*/MaxAnalysisRecursionDepth - 1))
+ if (!isKnownNonNegative(Op0, DL))
Preconditions.emplace_back(CmpInst::ICMP_SGE, Op0,
ConstantInt::get(Op0->getType(), 0));
- if (!isKnownNonNegative(Op1, DL, /*Depth=*/MaxAnalysisRecursionDepth - 1))
+ if (!isKnownNonNegative(Op1, DL))
Preconditions.emplace_back(CmpInst::ICMP_SGE, Op1,
ConstantInt::get(Op1->getType(), 0));
@@ -1016,6 +1032,20 @@ void State::addInfoForInductions(BasicBlock &BB) {
WorkList.push_back(FactOrCheck::getConditionFact(
DTN, CmpInst::ICMP_SLT, PN, B,
ConditionTy(CmpInst::ICMP_SLE, StartValue, B)));
+
+ // Try to add condition from header to the exit blocks. When exiting either
+ // with EQ or NE in the header, we know that the induction value must be u<=
+ // B, as other exits may only exit earlier.
+ assert(!StepOffset.isNegative() && "induction must be increasing");
+ assert((Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
+ "unsupported predicate");
+ ConditionTy Precond = {CmpInst::ICMP_ULE, StartValue, B};
+ SmallVector<BasicBlock *> ExitBBs;
+ L->getExitBlocks(ExitBBs);
+ for (BasicBlock *EB : ExitBBs) {
+ WorkList.emplace_back(FactOrCheck::getConditionFact(
+ DT.getNode(EB), CmpInst::ICMP_ULE, A, B, Precond));
+ }
}
void State::addInfoFor(BasicBlock &BB) {
@@ -1057,6 +1087,8 @@ void State::addInfoFor(BasicBlock &BB) {
}
// Enqueue ssub_with_overflow for simplification.
case Intrinsic::ssub_with_overflow:
+ case Intrinsic::ucmp:
+ case Intrinsic::scmp:
WorkList.push_back(
FactOrCheck::getCheck(DT.getNode(&BB), cast<CallInst>(&I)));
break;
@@ -1065,6 +1097,9 @@ void State::addInfoFor(BasicBlock &BB) {
case Intrinsic::umax:
case Intrinsic::smin:
case Intrinsic::smax:
+ // TODO: handle llvm.abs as well
+ WorkList.push_back(
+ FactOrCheck::getCheck(DT.getNode(&BB), cast<CallInst>(&I)));
// TODO: Check if it is possible to instead only added the min/max facts
// when simplifying uses of the min/max intrinsics.
if (!isGuaranteedNotToBePoison(&I))
@@ -1395,6 +1430,48 @@ static bool checkAndReplaceCondition(
return false;
}
+static bool checkAndReplaceMinMax(MinMaxIntrinsic *MinMax, ConstraintInfo &Info,
+ SmallVectorImpl<Instruction *> &ToRemove) {
+ auto ReplaceMinMaxWithOperand = [&](MinMaxIntrinsic *MinMax, bool UseLHS) {
+ // TODO: generate reproducer for min/max.
+ MinMax->replaceAllUsesWith(MinMax->getOperand(UseLHS ? 0 : 1));
+ ToRemove.push_back(MinMax);
+ return true;
+ };
+
+ ICmpInst::Predicate Pred =
+ ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
+ if (auto ImpliedCondition = checkCondition(
+ Pred, MinMax->getOperand(0), MinMax->getOperand(1), MinMax, Info))
+ return ReplaceMinMaxWithOperand(MinMax, *ImpliedCondition);
+ if (auto ImpliedCondition = checkCondition(
+ Pred, MinMax->getOperand(1), MinMax->getOperand(0), MinMax, Info))
+ return ReplaceMinMaxWithOperand(MinMax, !*ImpliedCondition);
+ return false;
+}
+
+static bool checkAndReplaceCmp(CmpIntrinsic *I, ConstraintInfo &Info,
+ SmallVectorImpl<Instruction *> &ToRemove) {
+ Value *LHS = I->getOperand(0);
+ Value *RHS = I->getOperand(1);
+ if (checkCondition(I->getGTPredicate(), LHS, RHS, I, Info).value_or(false)) {
+ I->replaceAllUsesWith(ConstantInt::get(I->getType(), 1));
+ ToRemove.push_back(I);
+ return true;
+ }
+ if (checkCondition(I->getLTPredicate(), LHS, RHS, I, Info).value_or(false)) {
+ I->replaceAllUsesWith(ConstantInt::getSigned(I->getType(), -1));
+ ToRemove.push_back(I);
+ return true;
+ }
+ if (checkCondition(ICmpInst::ICMP_EQ, LHS, RHS, I, Info)) {
+ I->replaceAllUsesWith(ConstantInt::get(I->getType(), 0));
+ ToRemove.push_back(I);
+ return true;
+ }
+ return false;
+}
+
static void
removeEntryFromStack(const StackEntry &E, ConstraintInfo &Info,
Module *ReproducerModule,
@@ -1602,7 +1679,7 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
SmallVector<Value *> FunctionArgs;
for (Value &Arg : F.args())
FunctionArgs.push_back(&Arg);
- ConstraintInfo Info(F.getParent()->getDataLayout(), FunctionArgs);
+ ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
State S(DT, LI, SE);
std::unique_ptr<Module> ReproducerModule(
DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
@@ -1695,6 +1772,10 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
ReproducerCondStack, DFSInStack);
}
Changed |= Simplified;
+ } else if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(Inst)) {
+ Changed |= checkAndReplaceMinMax(MinMax, Info, ToRemove);
+ } else if (auto *CmpIntr = dyn_cast<CmpIntrinsic>(Inst)) {
+ Changed |= checkAndReplaceCmp(CmpIntr, Info, ToRemove);
}
continue;
}
@@ -1730,7 +1811,10 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
if (!CB.isConditionFact()) {
Value *X;
if (match(CB.Inst, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) {
- // TODO: Add CB.Inst >= 0 fact.
+ // If is_int_min_poison is true then we may assume llvm.abs >= 0.
+ if (cast<ConstantInt>(CB.Inst->getOperand(1))->isOne())
+ AddFact(CmpInst::ICMP_SGE, CB.Inst,
+ ConstantInt::get(CB.Inst->getType(), 0));
AddFact(CmpInst::ICMP_SGE, CB.Inst, X);
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 9235850de92f..95de8eceb6be 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
@@ -47,11 +48,6 @@ using namespace llvm;
#define DEBUG_TYPE "correlated-value-propagation"
-static cl::opt<bool> CanonicalizeICmpPredicatesToUnsigned(
- "canonicalize-icmp-predicates-to-unsigned", cl::init(true), cl::Hidden,
- cl::desc("Enables canonicalization of signed relational predicates to "
- "unsigned (e.g. sgt => ugt)"));
-
STATISTIC(NumPhis, "Number of phis propagated");
STATISTIC(NumPhiCommon, "Number of phis deleted via common incoming value");
STATISTIC(NumSelects, "Number of selects propagated");
@@ -67,6 +63,7 @@ STATISTIC(NumAShrsConverted, "Number of ashr converted to lshr");
STATISTIC(NumAShrsRemoved, "Number of ashr removed");
STATISTIC(NumSRems, "Number of srem converted to urem");
STATISTIC(NumSExt, "Number of sext converted to zext");
+STATISTIC(NumSIToFP, "Number of sitofp converted to uitofp");
STATISTIC(NumSICmps, "Number of signed icmp preds simplified to unsigned");
STATISTIC(NumAnd, "Number of ands removed");
STATISTIC(NumNW, "Number of no-wrap deductions");
@@ -89,10 +86,13 @@ STATISTIC(NumOverflows, "Number of overflow checks removed");
STATISTIC(NumSaturating,
"Number of saturating arithmetics converted to normal arithmetics");
STATISTIC(NumNonNull, "Number of function pointer arguments marked non-null");
+STATISTIC(NumCmpIntr, "Number of llvm.[us]cmp intrinsics removed");
STATISTIC(NumMinMax, "Number of llvm.[us]{min,max} intrinsics removed");
+STATISTIC(NumSMinMax,
+ "Number of llvm.s{min,max} intrinsics simplified to unsigned");
STATISTIC(NumUDivURemsNarrowedExpanded,
"Number of bound udiv's/urem's expanded");
-STATISTIC(NumZExt, "Number of non-negative deductions");
+STATISTIC(NumNNeg, "Number of zext/uitofp non-negative deductions");
static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
if (Constant *C = LVI->getConstant(V, At))
@@ -109,14 +109,8 @@ static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
if (!Op1)
return nullptr;
- LazyValueInfo::Tristate Result = LVI->getPredicateAt(
- C->getPredicate(), Op0, Op1, At, /*UseBlockValue=*/false);
- if (Result == LazyValueInfo::Unknown)
- return nullptr;
-
- return (Result == LazyValueInfo::True)
- ? ConstantInt::getTrue(C->getContext())
- : ConstantInt::getFalse(C->getContext());
+ return LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At,
+ /*UseBlockValue=*/false);
}
static bool processSelect(SelectInst *S, LazyValueInfo *LVI) {
@@ -243,15 +237,17 @@ static Value *getValueOnEdge(LazyValueInfo *LVI, Value *Incoming,
// The "false" case
if (auto *C = dyn_cast<Constant>(SI->getFalseValue()))
- if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI) ==
- LazyValueInfo::False)
+ if (auto *Res = dyn_cast_or_null<ConstantInt>(
+ LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI));
+ Res && Res->isZero())
return SI->getTrueValue();
// The "true" case,
// similar to the select "false" case, but try the select "true" value
if (auto *C = dyn_cast<Constant>(SI->getTrueValue()))
- if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI) ==
- LazyValueInfo::False)
+ if (auto *Res = dyn_cast_or_null<ConstantInt>(
+ LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, From, To, CxtI));
+ Res && Res->isZero())
return SI->getFalseValue();
return nullptr;
@@ -289,12 +285,8 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI, DominatorTree *DT,
}
static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) {
- if (!CanonicalizeICmpPredicatesToUnsigned)
- return false;
-
- // Only for signed relational comparisons of scalar integers.
- if (Cmp->getType()->isVectorTy() ||
- !Cmp->getOperand(0)->getType()->isIntegerTy())
+ // Only for signed relational comparisons of integers.
+ if (!Cmp->getOperand(0)->getType()->isIntOrIntVectorTy())
return false;
if (!Cmp->isSigned())
@@ -324,16 +316,13 @@ static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) {
static bool constantFoldCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
Value *Op0 = Cmp->getOperand(0);
Value *Op1 = Cmp->getOperand(1);
- LazyValueInfo::Tristate Result =
- LVI->getPredicateAt(Cmp->getPredicate(), Op0, Op1, Cmp,
- /*UseBlockValue=*/true);
- if (Result == LazyValueInfo::Unknown)
+ Constant *Res = LVI->getPredicateAt(Cmp->getPredicate(), Op0, Op1, Cmp,
+ /*UseBlockValue=*/true);
+ if (!Res)
return false;
++NumCmps;
- Constant *TorF =
- ConstantInt::get(CmpInst::makeCmpResultType(Op0->getType()), Result);
- Cmp->replaceAllUsesWith(TorF);
+ Cmp->replaceAllUsesWith(Res);
Cmp->eraseFromParent();
return true;
}
@@ -371,14 +360,15 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
{ // Scope for SwitchInstProfUpdateWrapper. It must not live during
// ConstantFoldTerminator() as the underlying SwitchInst can be changed.
SwitchInstProfUpdateWrapper SI(*I);
+ unsigned ReachableCaseCount = 0;
for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
ConstantInt *Case = CI->getCaseValue();
- LazyValueInfo::Tristate State =
+ auto *Res = dyn_cast_or_null<ConstantInt>(
LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I,
- /* UseBlockValue */ true);
+ /* UseBlockValue */ true));
- if (State == LazyValueInfo::False) {
+ if (Res && Res->isZero()) {
// This case never fires - remove it.
BasicBlock *Succ = CI->getCaseSuccessor();
Succ->removePredecessor(BB);
@@ -395,7 +385,7 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}});
continue;
}
- if (State == LazyValueInfo::True) {
+ if (Res && Res->isOne()) {
// This case always fires. Arrange for the switch to be turned into an
// unconditional branch by replacing the switch condition with the case
// value.
@@ -407,6 +397,31 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
// Increment the case iterator since we didn't delete it.
++CI;
+ ++ReachableCaseCount;
+ }
+
+ BasicBlock *DefaultDest = SI->getDefaultDest();
+ if (ReachableCaseCount > 1 &&
+ !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())) {
+ ConstantRange CR = LVI->getConstantRangeAtUse(I->getOperandUse(0),
+ /*UndefAllowed*/ false);
+ // The default dest is unreachable if all cases are covered.
+ if (!CR.isSizeLargerThan(ReachableCaseCount)) {
+ BasicBlock *NewUnreachableBB =
+ BasicBlock::Create(BB->getContext(), "default.unreachable",
+ BB->getParent(), DefaultDest);
+ new UnreachableInst(BB->getContext(), NewUnreachableBB);
+
+ DefaultDest->removePredecessor(BB);
+ SI->setDefaultDest(NewUnreachableBB);
+
+ if (SuccessorsCount[DefaultDest] == 1)
+ DTU.applyUpdates({{DominatorTree::Delete, BB, DefaultDest}});
+ DTU.applyUpdates({{DominatorTree::Insert, BB, NewUnreachableBB}});
+
+ ++NumDeadCases;
+ Changed = true;
+ }
}
}
@@ -483,12 +498,8 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI);
// because it is negation-invariant.
static bool processAbsIntrinsic(IntrinsicInst *II, LazyValueInfo *LVI) {
Value *X = II->getArgOperand(0);
- Type *Ty = X->getType();
- if (!Ty->isIntegerTy())
- return false;
-
bool IsIntMinPoison = cast<ConstantInt>(II->getArgOperand(1))->isOne();
- APInt IntMin = APInt::getSignedMinValue(Ty->getScalarSizeInBits());
+ APInt IntMin = APInt::getSignedMinValue(X->getType()->getScalarSizeInBits());
ConstantRange Range = LVI->getConstantRangeAtUse(
II->getOperandUse(0), /*UndefAllowed*/ IsIntMinPoison);
@@ -503,7 +514,7 @@ static bool processAbsIntrinsic(IntrinsicInst *II, LazyValueInfo *LVI) {
// Is X in [IntMin, 0]? NOTE: INT_MIN is fine!
if (Range.getSignedMax().isNonPositive()) {
IRBuilder<> B(II);
- Value *NegX = B.CreateNeg(X, II->getName(), /*HasNUW=*/false,
+ Value *NegX = B.CreateNeg(X, II->getName(),
/*HasNSW=*/IsIntMinPoison);
++NumAbs;
II->replaceAllUsesWith(NegX);
@@ -527,18 +538,69 @@ static bool processAbsIntrinsic(IntrinsicInst *II, LazyValueInfo *LVI) {
return false;
}
+static bool processCmpIntrinsic(CmpIntrinsic *CI, LazyValueInfo *LVI) {
+ ConstantRange LHS_CR =
+ LVI->getConstantRangeAtUse(CI->getOperandUse(0), /*UndefAllowed*/ false);
+ ConstantRange RHS_CR =
+ LVI->getConstantRangeAtUse(CI->getOperandUse(1), /*UndefAllowed*/ false);
+
+ if (LHS_CR.icmp(CI->getGTPredicate(), RHS_CR)) {
+ ++NumCmpIntr;
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ CI->eraseFromParent();
+ return true;
+ }
+ if (LHS_CR.icmp(CI->getLTPredicate(), RHS_CR)) {
+ ++NumCmpIntr;
+ CI->replaceAllUsesWith(ConstantInt::getSigned(CI->getType(), -1));
+ CI->eraseFromParent();
+ return true;
+ }
+ if (LHS_CR.icmp(ICmpInst::ICMP_EQ, RHS_CR)) {
+ ++NumCmpIntr;
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 0));
+ CI->eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
// See if this min/max intrinsic always picks it's one specific operand.
+// If not, check whether we can canonicalize signed minmax into unsigned version
static bool processMinMaxIntrinsic(MinMaxIntrinsic *MM, LazyValueInfo *LVI) {
CmpInst::Predicate Pred = CmpInst::getNonStrictPredicate(MM->getPredicate());
- LazyValueInfo::Tristate Result = LVI->getPredicateAt(
- Pred, MM->getLHS(), MM->getRHS(), MM, /*UseBlockValue=*/true);
- if (Result == LazyValueInfo::Unknown)
- return false;
+ ConstantRange LHS_CR = LVI->getConstantRangeAtUse(MM->getOperandUse(0),
+ /*UndefAllowed*/ false);
+ ConstantRange RHS_CR = LVI->getConstantRangeAtUse(MM->getOperandUse(1),
+ /*UndefAllowed*/ false);
+ if (LHS_CR.icmp(Pred, RHS_CR)) {
+ ++NumMinMax;
+ MM->replaceAllUsesWith(MM->getLHS());
+ MM->eraseFromParent();
+ return true;
+ }
+ if (RHS_CR.icmp(Pred, LHS_CR)) {
+ ++NumMinMax;
+ MM->replaceAllUsesWith(MM->getRHS());
+ MM->eraseFromParent();
+ return true;
+ }
- ++NumMinMax;
- MM->replaceAllUsesWith(MM->getOperand(!Result));
- MM->eraseFromParent();
- return true;
+ if (MM->isSigned() &&
+ ConstantRange::areInsensitiveToSignednessOfICmpPredicate(LHS_CR,
+ RHS_CR)) {
+ ++NumSMinMax;
+ IRBuilder<> B(MM);
+ MM->replaceAllUsesWith(B.CreateBinaryIntrinsic(
+ MM->getIntrinsicID() == Intrinsic::smin ? Intrinsic::umin
+ : Intrinsic::umax,
+ MM->getLHS(), MM->getRHS()));
+ MM->eraseFromParent();
+ return true;
+ }
+
+ return false;
}
// Rewrite this with.overflow intrinsic as non-overflowing.
@@ -573,7 +635,7 @@ static bool processSaturatingInst(SaturatingInst *SI, LazyValueInfo *LVI) {
bool NSW = SI->isSigned();
bool NUW = !SI->isSigned();
BinaryOperator *BinOp = BinaryOperator::Create(
- Opcode, SI->getLHS(), SI->getRHS(), SI->getName(), SI);
+ Opcode, SI->getLHS(), SI->getRHS(), SI->getName(), SI->getIterator());
BinOp->setDebugLoc(SI->getDebugLoc());
setDeducedOverflowingFlags(BinOp, Opcode, NSW, NUW);
@@ -595,20 +657,22 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
return processAbsIntrinsic(&cast<IntrinsicInst>(CB), LVI);
}
+ if (auto *CI = dyn_cast<CmpIntrinsic>(&CB)) {
+ return processCmpIntrinsic(CI, LVI);
+ }
+
if (auto *MM = dyn_cast<MinMaxIntrinsic>(&CB)) {
return processMinMaxIntrinsic(MM, LVI);
}
if (auto *WO = dyn_cast<WithOverflowInst>(&CB)) {
- if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(WO, LVI)) {
+ if (willNotOverflow(WO, LVI))
return processOverflowIntrinsic(WO, LVI);
- }
}
if (auto *SI = dyn_cast<SaturatingInst>(&CB)) {
- if (SI->getType()->isIntegerTy() && willNotOverflow(SI, LVI)) {
+ if (willNotOverflow(SI, LVI))
return processSaturatingInst(SI, LVI);
- }
}
bool Changed = false;
@@ -643,11 +707,12 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
// relatively expensive analysis for constants which are obviously either
// null or non-null to start with.
if (Type && !CB.paramHasAttr(ArgNo, Attribute::NonNull) &&
- !isa<Constant>(V) &&
- LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
- ConstantPointerNull::get(Type), &CB,
- /*UseBlockValue=*/false) == LazyValueInfo::False)
- ArgNos.push_back(ArgNo);
+ !isa<Constant>(V))
+ if (auto *Res = dyn_cast_or_null<ConstantInt>(LVI->getPredicateAt(
+ ICmpInst::ICMP_EQ, V, ConstantPointerNull::get(Type), &CB,
+ /*UseBlockValue=*/false));
+ Res && Res->isZero())
+ ArgNos.push_back(ArgNo);
ArgNo++;
}
@@ -682,11 +747,10 @@ static bool narrowSDivOrSRem(BinaryOperator *Instr, const ConstantRange &LCR,
const ConstantRange &RCR) {
assert(Instr->getOpcode() == Instruction::SDiv ||
Instr->getOpcode() == Instruction::SRem);
- assert(!Instr->getType()->isVectorTy());
// Find the smallest power of two bitwidth that's sufficient to hold Instr's
// operands.
- unsigned OrigWidth = Instr->getType()->getIntegerBitWidth();
+ unsigned OrigWidth = Instr->getType()->getScalarSizeInBits();
// What is the smallest bit width that can accommodate the entire value ranges
// of both of the operands?
@@ -709,7 +773,7 @@ static bool narrowSDivOrSRem(BinaryOperator *Instr, const ConstantRange &LCR,
++NumSDivSRemsNarrowed;
IRBuilder<> B{Instr};
- auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth);
+ auto *TruncTy = Instr->getType()->getWithNewBitWidth(NewWidth);
auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy,
Instr->getName() + ".lhs.trunc");
auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy,
@@ -730,7 +794,6 @@ static bool expandUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR,
Type *Ty = Instr->getType();
assert(Instr->getOpcode() == Instruction::UDiv ||
Instr->getOpcode() == Instruction::URem);
- assert(!Ty->isVectorTy());
bool IsRem = Instr->getOpcode() == Instruction::URem;
Value *X = Instr->getOperand(0);
@@ -788,9 +851,12 @@ static bool expandUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR,
Value *FrozenX = X;
if (!isGuaranteedNotToBeUndef(X))
FrozenX = B.CreateFreeze(X, X->getName() + ".frozen");
- auto *AdjX = B.CreateNUWSub(FrozenX, Y, Instr->getName() + ".urem");
- auto *Cmp =
- B.CreateICmp(ICmpInst::ICMP_ULT, FrozenX, Y, Instr->getName() + ".cmp");
+ Value *FrozenY = Y;
+ if (!isGuaranteedNotToBeUndef(Y))
+ FrozenY = B.CreateFreeze(Y, Y->getName() + ".frozen");
+ auto *AdjX = B.CreateNUWSub(FrozenX, FrozenY, Instr->getName() + ".urem");
+ auto *Cmp = B.CreateICmp(ICmpInst::ICMP_ULT, FrozenX, FrozenY,
+ Instr->getName() + ".cmp");
ExpandedOp = B.CreateSelect(Cmp, FrozenX, AdjX);
} else {
auto *Cmp =
@@ -810,7 +876,6 @@ static bool narrowUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR,
const ConstantRange &YCR) {
assert(Instr->getOpcode() == Instruction::UDiv ||
Instr->getOpcode() == Instruction::URem);
- assert(!Instr->getType()->isVectorTy());
// Find the smallest power of two bitwidth that's sufficient to hold Instr's
// operands.
@@ -823,12 +888,12 @@ static bool narrowUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR,
// NewWidth might be greater than OrigWidth if OrigWidth is not a power of
// two.
- if (NewWidth >= Instr->getType()->getIntegerBitWidth())
+ if (NewWidth >= Instr->getType()->getScalarSizeInBits())
return false;
++NumUDivURemsNarrowed;
IRBuilder<> B{Instr};
- auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth);
+ auto *TruncTy = Instr->getType()->getWithNewBitWidth(NewWidth);
auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy,
Instr->getName() + ".lhs.trunc");
auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy,
@@ -847,9 +912,6 @@ static bool narrowUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR,
static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
assert(Instr->getOpcode() == Instruction::UDiv ||
Instr->getOpcode() == Instruction::URem);
- if (Instr->getType()->isVectorTy())
- return false;
-
ConstantRange XCR = LVI->getConstantRangeAtUse(Instr->getOperandUse(0),
/*UndefAllowed*/ false);
// Allow undef for RHS, as we can assume it is division by zero UB.
@@ -864,7 +926,6 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
static bool processSRem(BinaryOperator *SDI, const ConstantRange &LCR,
const ConstantRange &RCR, LazyValueInfo *LVI) {
assert(SDI->getOpcode() == Instruction::SRem);
- assert(!SDI->getType()->isVectorTy());
if (LCR.abs().icmp(CmpInst::ICMP_ULT, RCR.abs())) {
SDI->replaceAllUsesWith(SDI->getOperand(0));
@@ -888,21 +949,22 @@ static bool processSRem(BinaryOperator *SDI, const ConstantRange &LCR,
for (Operand &Op : Ops) {
if (Op.D == Domain::NonNegative)
continue;
- auto *BO =
- BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI);
+ auto *BO = BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg",
+ SDI->getIterator());
BO->setDebugLoc(SDI->getDebugLoc());
Op.V = BO;
}
- auto *URem =
- BinaryOperator::CreateURem(Ops[0].V, Ops[1].V, SDI->getName(), SDI);
+ auto *URem = BinaryOperator::CreateURem(Ops[0].V, Ops[1].V, SDI->getName(),
+ SDI->getIterator());
URem->setDebugLoc(SDI->getDebugLoc());
auto *Res = URem;
// If the divident was non-positive, we need to negate the result.
if (Ops[0].D == Domain::NonPositive) {
- Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI);
+ Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg",
+ SDI->getIterator());
Res->setDebugLoc(SDI->getDebugLoc());
}
@@ -923,7 +985,6 @@ static bool processSRem(BinaryOperator *SDI, const ConstantRange &LCR,
static bool processSDiv(BinaryOperator *SDI, const ConstantRange &LCR,
const ConstantRange &RCR, LazyValueInfo *LVI) {
assert(SDI->getOpcode() == Instruction::SDiv);
- assert(!SDI->getType()->isVectorTy());
// Check whether the division folds to a constant.
ConstantRange DivCR = LCR.sdiv(RCR);
@@ -949,14 +1010,14 @@ static bool processSDiv(BinaryOperator *SDI, const ConstantRange &LCR,
for (Operand &Op : Ops) {
if (Op.D == Domain::NonNegative)
continue;
- auto *BO =
- BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI);
+ auto *BO = BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg",
+ SDI->getIterator());
BO->setDebugLoc(SDI->getDebugLoc());
Op.V = BO;
}
- auto *UDiv =
- BinaryOperator::CreateUDiv(Ops[0].V, Ops[1].V, SDI->getName(), SDI);
+ auto *UDiv = BinaryOperator::CreateUDiv(Ops[0].V, Ops[1].V, SDI->getName(),
+ SDI->getIterator());
UDiv->setDebugLoc(SDI->getDebugLoc());
UDiv->setIsExact(SDI->isExact());
@@ -964,7 +1025,8 @@ static bool processSDiv(BinaryOperator *SDI, const ConstantRange &LCR,
// If the operands had two different domains, we need to negate the result.
if (Ops[0].D != Ops[1].D) {
- Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI);
+ Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg",
+ SDI->getIterator());
Res->setDebugLoc(SDI->getDebugLoc());
}
@@ -980,9 +1042,6 @@ static bool processSDiv(BinaryOperator *SDI, const ConstantRange &LCR,
static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
assert(Instr->getOpcode() == Instruction::SDiv ||
Instr->getOpcode() == Instruction::SRem);
- if (Instr->getType()->isVectorTy())
- return false;
-
ConstantRange LCR =
LVI->getConstantRangeAtUse(Instr->getOperandUse(0), /*AllowUndef*/ false);
// Allow undef for RHS, as we can assume it is division by zero UB.
@@ -1001,12 +1060,9 @@ static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
}
static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
- if (SDI->getType()->isVectorTy())
- return false;
-
ConstantRange LRange =
LVI->getConstantRangeAtUse(SDI->getOperandUse(0), /*UndefAllowed*/ false);
- unsigned OrigWidth = SDI->getType()->getIntegerBitWidth();
+ unsigned OrigWidth = SDI->getType()->getScalarSizeInBits();
ConstantRange NegOneOrZero =
ConstantRange(APInt(OrigWidth, (uint64_t)-1, true), APInt(OrigWidth, 1));
if (NegOneOrZero.contains(LRange)) {
@@ -1022,7 +1078,7 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
++NumAShrsConverted;
auto *BO = BinaryOperator::CreateLShr(SDI->getOperand(0), SDI->getOperand(1),
- "", SDI);
+ "", SDI->getIterator());
BO->takeName(SDI);
BO->setDebugLoc(SDI->getDebugLoc());
BO->setIsExact(SDI->isExact());
@@ -1033,16 +1089,14 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
}
static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
- if (SDI->getType()->isVectorTy())
- return false;
-
const Use &Base = SDI->getOperandUse(0);
if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false)
.isAllNonNegative())
return false;
++NumSExt;
- auto *ZExt = CastInst::CreateZExtOrBitCast(Base, SDI->getType(), "", SDI);
+ auto *ZExt = CastInst::CreateZExtOrBitCast(Base, SDI->getType(), "",
+ SDI->getIterator());
ZExt->takeName(SDI);
ZExt->setDebugLoc(SDI->getDebugLoc());
ZExt->setNonNeg();
@@ -1052,20 +1106,43 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
return true;
}
-static bool processZExt(ZExtInst *ZExt, LazyValueInfo *LVI) {
- if (ZExt->getType()->isVectorTy())
+static bool processPossibleNonNeg(PossiblyNonNegInst *I, LazyValueInfo *LVI) {
+ if (I->hasNonNeg())
return false;
- if (ZExt->hasNonNeg())
+ const Use &Base = I->getOperandUse(0);
+ if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false)
+ .isAllNonNegative())
return false;
- const Use &Base = ZExt->getOperandUse(0);
+ ++NumNNeg;
+ I->setNonNeg();
+
+ return true;
+}
+
+static bool processZExt(ZExtInst *ZExt, LazyValueInfo *LVI) {
+ return processPossibleNonNeg(cast<PossiblyNonNegInst>(ZExt), LVI);
+}
+
+static bool processUIToFP(UIToFPInst *UIToFP, LazyValueInfo *LVI) {
+ return processPossibleNonNeg(cast<PossiblyNonNegInst>(UIToFP), LVI);
+}
+
+static bool processSIToFP(SIToFPInst *SIToFP, LazyValueInfo *LVI) {
+ const Use &Base = SIToFP->getOperandUse(0);
if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false)
.isAllNonNegative())
return false;
- ++NumZExt;
- ZExt->setNonNeg();
+ ++NumSIToFP;
+ auto *UIToFP = CastInst::Create(Instruction::UIToFP, Base, SIToFP->getType(),
+ "", SIToFP->getIterator());
+ UIToFP->takeName(SIToFP);
+ UIToFP->setDebugLoc(SIToFP->getDebugLoc());
+ UIToFP->setNonNeg();
+ SIToFP->replaceAllUsesWith(UIToFP);
+ SIToFP->eraseFromParent();
return true;
}
@@ -1073,22 +1150,16 @@ static bool processZExt(ZExtInst *ZExt, LazyValueInfo *LVI) {
static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
using OBO = OverflowingBinaryOperator;
- if (BinOp->getType()->isVectorTy())
- return false;
-
bool NSW = BinOp->hasNoSignedWrap();
bool NUW = BinOp->hasNoUnsignedWrap();
if (NSW && NUW)
return false;
Instruction::BinaryOps Opcode = BinOp->getOpcode();
- Value *LHS = BinOp->getOperand(0);
- Value *RHS = BinOp->getOperand(1);
-
- ConstantRange LRange =
- LVI->getConstantRange(LHS, BinOp, /*UndefAllowed*/ false);
- ConstantRange RRange =
- LVI->getConstantRange(RHS, BinOp, /*UndefAllowed*/ false);
+ ConstantRange LRange = LVI->getConstantRangeAtUse(BinOp->getOperandUse(0),
+ /*UndefAllowed=*/false);
+ ConstantRange RRange = LVI->getConstantRangeAtUse(BinOp->getOperandUse(1),
+ /*UndefAllowed=*/false);
bool Changed = false;
bool NewNUW = false, NewNSW = false;
@@ -1111,21 +1182,20 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
}
static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) {
- if (BinOp->getType()->isVectorTy())
- return false;
+ using namespace llvm::PatternMatch;
// Pattern match (and lhs, C) where C includes a superset of bits which might
// be set in lhs. This is a common truncation idiom created by instcombine.
const Use &LHS = BinOp->getOperandUse(0);
- ConstantInt *RHS = dyn_cast<ConstantInt>(BinOp->getOperand(1));
- if (!RHS || !RHS->getValue().isMask())
+ const APInt *RHS;
+ if (!match(BinOp->getOperand(1), m_LowBitMask(RHS)))
return false;
// We can only replace the AND with LHS based on range info if the range does
// not include undef.
ConstantRange LRange =
LVI->getConstantRangeAtUse(LHS, /*UndefAllowed=*/false);
- if (!LRange.getUnsignedMax().ule(RHS->getValue()))
+ if (!LRange.getUnsignedMax().ule(*RHS))
return false;
BinOp->replaceAllUsesWith(LHS);
@@ -1177,6 +1247,12 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
case Instruction::ZExt:
BBChanged |= processZExt(cast<ZExtInst>(&II), LVI);
break;
+ case Instruction::UIToFP:
+ BBChanged |= processUIToFP(cast<UIToFPInst>(&II), LVI);
+ break;
+ case Instruction::SIToFP:
+ BBChanged |= processSIToFP(cast<SIToFPInst>(&II), LVI);
+ break;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
@@ -1227,6 +1303,12 @@ CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
if (!Changed) {
PA = PreservedAnalyses::all();
} else {
+#if defined(EXPENSIVE_CHECKS)
+ assert(DT->verify(DominatorTree::VerificationLevel::Full));
+#else
+ assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+#endif // EXPENSIVE_CHECKS
+
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LazyValueAnalysis>();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 85d4065286e4..4371b821eae6 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -65,6 +65,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CFG.h"
@@ -95,6 +96,11 @@ static cl::opt<bool>
cl::desc("View the CFG before DFA Jump Threading"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> EarlyExitHeuristic(
+ "dfa-early-exit-heuristic",
+ cl::desc("Exit early if an unpredictable value come from the same loop"),
+ cl::Hidden, cl::init(true));
+
static cl::opt<unsigned> MaxPathLength(
"dfa-max-path-length",
cl::desc("Max number of blocks searched to find a threading path"),
@@ -125,17 +131,18 @@ public:
explicit operator bool() const { return SI && SIUse; }
};
-void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
+void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
std::vector<SelectInstToUnfold> *NewSIsToUnfold,
std::vector<BasicBlock *> *NewBBs);
class DFAJumpThreading {
public:
- DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT,
+ DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE)
- : AC(AC), DT(DT), TTI(TTI), ORE(ORE) {}
+ : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}
bool run(Function &F);
+ bool LoopInfoBroken;
private:
void
@@ -151,7 +158,7 @@ private:
std::vector<SelectInstToUnfold> NewSIsToUnfold;
std::vector<BasicBlock *> NewBBs;
- unfold(&DTU, SIToUnfold, &NewSIsToUnfold, &NewBBs);
+ unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs);
// Put newly discovered select instructions into the work list.
for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold)
@@ -161,6 +168,7 @@ private:
AssumptionCache *AC;
DominatorTree *DT;
+ LoopInfo *LI;
TargetTransformInfo *TTI;
OptimizationRemarkEmitter *ORE;
};
@@ -194,7 +202,7 @@ void createBasicBlockAndSinkSelectInst(
/// created basic blocks into \p NewBBs.
///
/// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
-void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
+void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
std::vector<SelectInstToUnfold> *NewSIsToUnfold,
std::vector<BasicBlock *> *NewBBs) {
SelectInst *SI = SIToUnfold.getInst();
@@ -300,6 +308,12 @@ void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold,
DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT},
{DominatorTree::Insert, StartBlock, FT}});
+ // Preserve loop info
+ if (Loop *L = LI->getLoopFor(SI->getParent())) {
+ for (BasicBlock *NewBB : *NewBBs)
+ L->addBasicBlockToLoop(NewBB, *LI);
+ }
+
// The select is now dead.
assert(SI->use_empty() && "Select must be dead now");
SI->eraseFromParent();
@@ -378,7 +392,8 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
#endif
struct MainSwitch {
- MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) {
+ MainSwitch(SwitchInst *SI, LoopInfo *LI, OptimizationRemarkEmitter *ORE)
+ : LI(LI) {
if (isCandidate(SI)) {
Instr = SI;
} else {
@@ -402,7 +417,7 @@ private:
///
/// Also, collect select instructions to unfold.
bool isCandidate(const SwitchInst *SI) {
- std::deque<Value *> Q;
+ std::deque<std::pair<Value *, BasicBlock *>> Q;
SmallSet<Value *, 16> SeenValues;
SelectInsts.clear();
@@ -411,22 +426,29 @@ private:
if (!isa<PHINode>(SICond))
return false;
- addToQueue(SICond, Q, SeenValues);
+ // The switch must be in a loop.
+ const Loop *L = LI->getLoopFor(SI->getParent());
+ if (!L)
+ return false;
+
+ addToQueue(SICond, nullptr, Q, SeenValues);
while (!Q.empty()) {
- Value *Current = Q.front();
+ Value *Current = Q.front().first;
+ BasicBlock *CurrentIncomingBB = Q.front().second;
Q.pop_front();
if (auto *Phi = dyn_cast<PHINode>(Current)) {
- for (Value *Incoming : Phi->incoming_values()) {
- addToQueue(Incoming, Q, SeenValues);
+ for (BasicBlock *IncomingBB : Phi->blocks()) {
+ Value *Incoming = Phi->getIncomingValueForBlock(IncomingBB);
+ addToQueue(Incoming, IncomingBB, Q, SeenValues);
}
LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");
} else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) {
if (!isValidSelectInst(SelI))
return false;
- addToQueue(SelI->getTrueValue(), Q, SeenValues);
- addToQueue(SelI->getFalseValue(), Q, SeenValues);
+ addToQueue(SelI->getTrueValue(), CurrentIncomingBB, Q, SeenValues);
+ addToQueue(SelI->getFalseValue(), CurrentIncomingBB, Q, SeenValues);
LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");
if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back()))
SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));
@@ -439,6 +461,18 @@ private:
// initial switch values that can be ignored (they will hit the
// unthreaded switch) but this assumption will get checked later after
// paths have been enumerated (in function getStateDefMap).
+
+ // If the unpredictable value comes from the same inner loop it is
+ // likely that it will also be on the enumerated paths, causing us to
+ // exit after we have enumerated all the paths. This heuristic save
+ // compile time because a search for all the paths can become expensive.
+ if (EarlyExitHeuristic &&
+ L->contains(LI->getLoopFor(CurrentIncomingBB))) {
+ LLVM_DEBUG(dbgs()
+ << "\tExiting early due to unpredictability heuristic.\n");
+ return false;
+ }
+
continue;
}
}
@@ -446,11 +480,12 @@ private:
return true;
}
- void addToQueue(Value *Val, std::deque<Value *> &Q,
+ void addToQueue(Value *Val, BasicBlock *BB,
+ std::deque<std::pair<Value *, BasicBlock *>> &Q,
SmallSet<Value *, 16> &SeenValues) {
if (SeenValues.contains(Val))
return;
- Q.push_back(Val);
+ Q.push_back({Val, BB});
SeenValues.insert(Val);
}
@@ -488,14 +523,16 @@ private:
return true;
}
+ LoopInfo *LI;
SwitchInst *Instr = nullptr;
SmallVector<SelectInstToUnfold, 4> SelectInsts;
};
struct AllSwitchPaths {
- AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE)
- : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()),
- ORE(ORE) {}
+ AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE,
+ LoopInfo *LI)
+ : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE),
+ LI(LI) {}
std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; }
unsigned getNumThreadingPaths() { return TPaths.size(); }
@@ -516,7 +553,7 @@ struct AllSwitchPaths {
return;
}
- for (PathType Path : LoopPaths) {
+ for (const PathType &Path : LoopPaths) {
ThreadingPath TPath;
const BasicBlock *PrevBB = Path.back();
@@ -567,6 +604,12 @@ private:
Visited.insert(BB);
+ // Stop if we have reached the BB out of loop, since its successors have no
+ // impact on the DFA.
+ // TODO: Do we need to stop exploring if BB is the outer loop of the switch?
+ if (!LI->getLoopFor(BB))
+ return Res;
+
// Some blocks have multiple edges to the same successor, and this set
// is used to prevent a duplicate path from being generated
SmallSet<BasicBlock *, 4> Successors;
@@ -708,6 +751,7 @@ private:
BasicBlock *SwitchBlock;
OptimizationRemarkEmitter *ORE;
std::vector<ThreadingPath> TPaths;
+ LoopInfo *LI;
};
struct TransformDFA {
@@ -783,7 +827,8 @@ private:
return false;
}
- if (Metrics.convergent) {
+ // FIXME: Allow jump threading with controlled convergence.
+ if (Metrics.Convergence != ConvergenceKind::None) {
LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
<< "convergent instructions.\n");
ORE->emit([&]() {
@@ -1254,6 +1299,7 @@ bool DFAJumpThreading::run(Function &F) {
SmallVector<AllSwitchPaths, 2> ThreadableLoops;
bool MadeChanges = false;
+ LoopInfoBroken = false;
for (BasicBlock &BB : F) {
auto *SI = dyn_cast<SwitchInst>(BB.getTerminator());
@@ -1262,7 +1308,7 @@ bool DFAJumpThreading::run(Function &F) {
LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
<< " is a candidate\n");
- MainSwitch Switch(SI, ORE);
+ MainSwitch Switch(SI, LI, ORE);
if (!Switch.getInstr())
continue;
@@ -1275,7 +1321,7 @@ bool DFAJumpThreading::run(Function &F) {
if (!Switch.getSelectInsts().empty())
MadeChanges = true;
- AllSwitchPaths SwitchPaths(&Switch, ORE);
+ AllSwitchPaths SwitchPaths(&Switch, ORE, LI);
SwitchPaths.run();
if (SwitchPaths.getNumThreadingPaths() > 0) {
@@ -1286,10 +1332,15 @@ bool DFAJumpThreading::run(Function &F) {
// strict requirement but it can cause buggy behavior if there is an
// overlap of blocks in different opportunities. There is a lot of room to
// experiment with catching more opportunities here.
+ // NOTE: To release this contraint, we must handle LoopInfo invalidation
break;
}
}
+#ifdef NDEBUG
+ LI->verify(*DT);
+#endif
+
SmallPtrSet<const Value *, 32> EphValues;
if (ThreadableLoops.size() > 0)
CodeMetrics::collectEphemeralValues(&F, AC, EphValues);
@@ -1298,6 +1349,7 @@ bool DFAJumpThreading::run(Function &F) {
TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
Transform.run();
MadeChanges = true;
+ LoopInfoBroken = true;
}
#ifdef EXPENSIVE_CHECKS
@@ -1315,13 +1367,16 @@ PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
FunctionAnalysisManager &AM) {
AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
OptimizationRemarkEmitter ORE(&F);
-
- if (!DFAJumpThreading(&AC, &DT, &TTI, &ORE).run(F))
+ DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE);
+ if (!ThreadImpl.run(F))
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
+ if (!ThreadImpl.LoopInfoBroken)
+ PA.preserve<LoopAnalysis>();
return PA;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index f0f0f5f28025..931606c6f8fe 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -484,7 +484,7 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
uint64_t NewSizeInBits, bool IsOverwriteEnd) {
- const DataLayout &DL = Inst->getModule()->getDataLayout();
+ const DataLayout &DL = Inst->getDataLayout();
uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
uint64_t DeadSliceOffsetInBits =
OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
@@ -526,7 +526,8 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
// returned by getAssignmentMarkers so save a copy of the markers to iterate
// over.
auto LinkedRange = at::getAssignmentMarkers(Inst);
- SmallVector<DPValue *> LinkedDPVAssigns = at::getDPVAssignmentMarkers(Inst);
+ SmallVector<DbgVariableRecord *> LinkedDVRAssigns =
+ at::getDVRAssignmentMarkers(Inst);
SmallVector<DbgAssignIntrinsic *> Linked(LinkedRange.begin(),
LinkedRange.end());
auto InsertAssignForOverlap = [&](auto *Assign) {
@@ -554,7 +555,7 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
NewAssign->setKillAddress();
};
for_each(Linked, InsertAssignForOverlap);
- for_each(LinkedDPVAssigns, InsertAssignForOverlap);
+ for_each(LinkedDVRAssigns, InsertAssignForOverlap);
}
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
@@ -634,7 +635,8 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
Value *Indices[1] = {
ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
- Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "", DeadI);
+ Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "",
+ DeadI->getIterator());
NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
DeadIntrinsic->setDest(NewDestGEP);
}
@@ -868,7 +870,7 @@ struct DSEState {
PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
const LoopInfo &LI)
: F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
- PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
+ PDT(PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {
// Collect blocks with throwing instructions not modeled in MemorySSA and
// alloc-like objects.
unsigned PO = 0;
@@ -900,6 +902,16 @@ struct DSEState {
});
}
+ static void pushMemUses(MemoryAccess *Acc,
+ SmallVectorImpl<MemoryAccess *> &WorkList,
+ SmallPtrSetImpl<MemoryAccess *> &Visited) {
+ for (Use &U : Acc->uses()) {
+ auto *MA = cast<MemoryAccess>(U.getUser());
+ if (Visited.insert(MA).second)
+ WorkList.push_back(MA);
+ }
+ };
+
LocationSize strengthenLocationSize(const Instruction *I,
LocationSize Size) const {
if (auto *CB = dyn_cast<CallBase>(I)) {
@@ -1155,26 +1167,14 @@ struct DSEState {
}
/// Returns true if \p Def is not read before returning from the function.
- bool isWriteAtEndOfFunction(MemoryDef *Def) {
+ bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc) {
LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
<< *Def->getMemoryInst()
<< ") is at the end the function \n");
-
- auto MaybeLoc = getLocForWrite(Def->getMemoryInst());
- if (!MaybeLoc) {
- LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
- return false;
- }
-
SmallVector<MemoryAccess *, 4> WorkList;
SmallPtrSet<MemoryAccess *, 8> Visited;
- auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
- if (!Visited.insert(Acc).second)
- return;
- for (Use &U : Acc->uses())
- WorkList.push_back(cast<MemoryAccess>(U.getUser()));
- };
- PushMemUses(Def);
+
+ pushMemUses(Def, WorkList, Visited);
for (unsigned I = 0; I < WorkList.size(); I++) {
if (WorkList.size() >= MemorySSAScanLimit) {
LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
@@ -1186,22 +1186,22 @@ struct DSEState {
// AliasAnalysis does not account for loops. Limit elimination to
// candidates for which we can guarantee they always store to the same
// memory location.
- if (!isGuaranteedLoopInvariant(MaybeLoc->Ptr))
+ if (!isGuaranteedLoopInvariant(DefLoc.Ptr))
return false;
- PushMemUses(cast<MemoryPhi>(UseAccess));
+ pushMemUses(cast<MemoryPhi>(UseAccess), WorkList, Visited);
continue;
}
// TODO: Checking for aliasing is expensive. Consider reducing the amount
// of times this is called and/or caching it.
Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
- if (isReadClobber(*MaybeLoc, UseInst)) {
+ if (isReadClobber(DefLoc, UseInst)) {
LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
return false;
}
if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
- PushMemUses(UseDef);
+ pushMemUses(UseDef, WorkList, Visited);
}
return true;
}
@@ -1503,12 +1503,9 @@ struct DSEState {
LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
<< *MaybeDeadI << ")\n");
- SmallSetVector<MemoryAccess *, 32> WorkList;
- auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
- for (Use &U : Acc->uses())
- WorkList.insert(cast<MemoryAccess>(U.getUser()));
- };
- PushMemUses(MaybeDeadAccess);
+ SmallVector<MemoryAccess *, 32> WorkList;
+ SmallPtrSet<MemoryAccess *, 32> Visited;
+ pushMemUses(MaybeDeadAccess, WorkList, Visited);
// Check if DeadDef may be read.
for (unsigned I = 0; I < WorkList.size(); I++) {
@@ -1532,7 +1529,7 @@ struct DSEState {
continue;
}
LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
- PushMemUses(UseAccess);
+ pushMemUses(UseAccess, WorkList, Visited);
continue;
}
@@ -1557,7 +1554,7 @@ struct DSEState {
if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
- PushMemUses(UseAccess);
+ pushMemUses(UseAccess, WorkList, Visited);
continue;
}
@@ -1616,7 +1613,7 @@ struct DSEState {
return std::nullopt;
}
} else
- PushMemUses(UseDef);
+ pushMemUses(UseDef, WorkList, Visited);
}
}
@@ -1819,8 +1816,11 @@ struct DSEState {
Instruction *DefI = Def->getMemoryInst();
auto DefLoc = getLocForWrite(DefI);
- if (!DefLoc || !isRemovable(DefI))
+ if (!DefLoc || !isRemovable(DefI)) {
+ LLVM_DEBUG(dbgs() << " ... could not get location for write or "
+ "instruction not removable.\n");
continue;
+ }
// NOTE: Currently eliminating writes at the end of a function is
// limited to MemoryDefs with a single underlying object, to save
@@ -1831,7 +1831,7 @@ struct DSEState {
if (!isInvisibleToCallerAfterRet(UO))
continue;
- if (isWriteAtEndOfFunction(Def)) {
+ if (isWriteAtEndOfFunction(Def, *DefLoc)) {
// See through pointer-to-pointer bitcasts
LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
"of the function\n");
@@ -1923,6 +1923,57 @@ struct DSEState {
return true;
}
+ // Check if there is a dominating condition, that implies that the value
+ // being stored in a ptr is already present in the ptr.
+ bool dominatingConditionImpliesValue(MemoryDef *Def) {
+ auto *StoreI = cast<StoreInst>(Def->getMemoryInst());
+ BasicBlock *StoreBB = StoreI->getParent();
+ Value *StorePtr = StoreI->getPointerOperand();
+ Value *StoreVal = StoreI->getValueOperand();
+
+ DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();
+ if (!IDom)
+ return false;
+
+ auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());
+ if (!BI || !BI->isConditional())
+ return false;
+
+ // In case both blocks are the same, it is not possible to determine
+ // if optimization is possible. (We would not want to optimize a store
+ // in the FalseBB if condition is true and vice versa.)
+ if (BI->getSuccessor(0) == BI->getSuccessor(1))
+ return false;
+
+ Instruction *ICmpL;
+ ICmpInst::Predicate Pred;
+ if (!match(BI->getCondition(),
+ m_c_ICmp(Pred,
+ m_CombineAnd(m_Load(m_Specific(StorePtr)),
+ m_Instruction(ICmpL)),
+ m_Specific(StoreVal))) ||
+ !ICmpInst::isEquality(Pred))
+ return false;
+
+ // In case the else blocks also branches to the if block or the other way
+ // around it is not possible to determine if the optimization is possible.
+ if (Pred == ICmpInst::ICMP_EQ &&
+ !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),
+ StoreBB))
+ return false;
+
+ if (Pred == ICmpInst::ICMP_NE &&
+ !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),
+ StoreBB))
+ return false;
+
+ MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);
+ MemoryAccess *ClobAcc =
+ MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
+
+ return MSSA.dominates(ClobAcc, LoadAcc);
+ }
+
/// \returns true if \p Def is a no-op store, either because it
/// directly stores back a loaded value or stores zero to a calloced object.
bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
@@ -1953,6 +2004,9 @@ struct DSEState {
if (!Store)
return false;
+ if (dominatingConditionImpliesValue(Def))
+ return true;
+
if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
if (LoadI->getPointerOperand() == Store->getOperand(1)) {
// Get the defining access for the load.
@@ -2053,10 +2107,12 @@ struct DSEState {
if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
// MemSetInst must have a write location.
- MemoryLocation UpperLoc = *getLocForWrite(UpperInst);
+ auto UpperLoc = getLocForWrite(UpperInst);
+ if (!UpperLoc)
+ return false;
int64_t InstWriteOffset = 0;
int64_t DepWriteOffset = 0;
- auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
+ auto OR = isOverwrite(UpperInst, DefInst, *UpperLoc, *MaybeDefLoc,
InstWriteOffset, DepWriteOffset);
Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
return StoredByte && StoredByte == MemSetI->getOperand(1) &&
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
index 57d3f312186e..d8aea1e810e9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -215,6 +215,7 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
RemInst = RealRem;
// And replace the original instruction with the new one.
OrigRemInst->replaceAllUsesWith(RealRem);
+ RealRem->setDebugLoc(OrigRemInst->getDebugLoc());
OrigRemInst->eraseFromParent();
NumRecomposed++;
// Note that we have left ((X / Y) * Y) around.
@@ -366,7 +367,9 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
if (!DivDominates)
DivInst->moveBefore(RemInst);
Mul->insertAfter(RemInst);
+ Mul->setDebugLoc(RemInst->getDebugLoc());
Sub->insertAfter(Mul);
+ Sub->setDebugLoc(RemInst->getDebugLoc());
// If DivInst has the exact flag, remove it. Otherwise this optimization
// may replace a well-defined value 'X % Y' with poison.
@@ -381,16 +384,19 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
// %mul = mul %div, 1 // %mul = undef
// %rem = sub %x, %mul // %rem = undef - undef = undef
// If X is not frozen, %rem becomes undef after transformation.
- // TODO: We need a undef-specific checking function in ValueTracking
- if (!isGuaranteedNotToBeUndefOrPoison(X, nullptr, DivInst, &DT)) {
- auto *FrX = new FreezeInst(X, X->getName() + ".frozen", DivInst);
+ if (!isGuaranteedNotToBeUndef(X, nullptr, DivInst, &DT)) {
+ auto *FrX =
+ new FreezeInst(X, X->getName() + ".frozen", DivInst->getIterator());
+ FrX->setDebugLoc(DivInst->getDebugLoc());
DivInst->setOperand(0, FrX);
Sub->setOperand(0, FrX);
}
// Same for Y. If X = 1 and Y = (undef | 1), %rem in src is either 1 or 0,
// but %rem in tgt can be one of many integer values.
- if (!isGuaranteedNotToBeUndefOrPoison(Y, nullptr, DivInst, &DT)) {
- auto *FrY = new FreezeInst(Y, Y->getName() + ".frozen", DivInst);
+ if (!isGuaranteedNotToBeUndef(Y, nullptr, DivInst, &DT)) {
+ auto *FrY =
+ new FreezeInst(Y, Y->getName() + ".frozen", DivInst->getIterator());
+ FrY->setDebugLoc(DivInst->getDebugLoc());
DivInst->setOperand(1, FrY);
Mul->setOperand(1, FrY);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index f736d429cb63..cf11f5bc885a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -1833,7 +1833,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
auto *MSSA =
UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
- EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
+ EarlyCSE CSE(F.getDataLayout(), TLI, TTI, DT, AC, MSSA);
if (!CSE.run())
return PreservedAnalyses::all();
@@ -1887,7 +1887,7 @@ public:
auto *MSSA =
UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
- EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
+ EarlyCSE CSE(F.getDataLayout(), TLI, TTI, DT, AC, MSSA);
return CSE.run();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
index ad2041cd4253..213d0f389c2e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -21,7 +21,7 @@
using namespace llvm;
-#define DEBUG_TYPE "flattencfg"
+#define DEBUG_TYPE "flatten-cfg"
namespace {
struct FlattenCFGLegacyPass : public FunctionPass {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
index ccca8bcc1a56..a4a1438dbe41 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -311,7 +311,7 @@ void Float2IntPass::walkForwards() {
}
// If there is a valid transform to be done, do it.
-bool Float2IntPass::validateAndTransform() {
+bool Float2IntPass::validateAndTransform(const DataLayout &DL) {
bool MadeChange = false;
// Iterate over every disjoint partition of the def-use graph.
@@ -359,9 +359,7 @@ bool Float2IntPass::validateAndTransform() {
// The number of bits required is the maximum of the upper and
// lower limits, plus one so it can be signed.
- unsigned MinBW = std::max(R.getLower().getSignificantBits(),
- R.getUpper().getSignificantBits()) +
- 1;
+ unsigned MinBW = R.getMinSignedBits() + 1;
LLVM_DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n");
// If we've run off the realms of the exactly representable integers,
@@ -376,15 +374,23 @@ bool Float2IntPass::validateAndTransform() {
LLVM_DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n");
continue;
}
- if (MinBW > 64) {
- LLVM_DEBUG(
- dbgs() << "F2I: Value requires more than 64 bits to represent!\n");
- continue;
- }
- // OK, R is known to be representable. Now pick a type for it.
- // FIXME: Pick the smallest legal type that will fit.
- Type *Ty = (MinBW > 32) ? Type::getInt64Ty(*Ctx) : Type::getInt32Ty(*Ctx);
+ // OK, R is known to be representable.
+ // Pick the smallest legal type that will fit.
+ Type *Ty = DL.getSmallestLegalIntType(*Ctx, MinBW);
+ if (!Ty) {
+ // Every supported target supports 64-bit and 32-bit integers,
+ // so fallback to a 32 or 64-bit integer if the value fits.
+ if (MinBW <= 32) {
+ Ty = Type::getInt32Ty(*Ctx);
+ } else if (MinBW <= 64) {
+ Ty = Type::getInt64Ty(*Ctx);
+ } else {
+ LLVM_DEBUG(dbgs() << "F2I: Value requires more bits to represent than "
+ "the target supports!\n");
+ continue;
+ }
+ }
for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
MI != ME; ++MI)
@@ -491,7 +497,8 @@ bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) {
walkBackwards();
walkForwards();
- bool Modified = validateAndTransform();
+ const DataLayout &DL = F.getDataLayout();
+ bool Modified = validateAndTransform(DL);
if (Modified)
cleanup();
return Modified;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
index e36578f3de7a..db39d8621d07 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -33,6 +33,7 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionPrecedenceTracking.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
@@ -419,7 +420,7 @@ GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) {
Expression E;
Type *PtrTy = GEP->getType()->getScalarType();
- const DataLayout &DL = GEP->getModule()->getDataLayout();
+ const DataLayout &DL = GEP->getDataLayout();
unsigned BitWidth = DL.getIndexTypeSizeInBits(PtrTy);
MapVector<Value *, APInt> VariableOffsets;
APInt ConstantOffset(BitWidth, 0);
@@ -725,6 +726,69 @@ void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
}
//===----------------------------------------------------------------------===//
+// LeaderMap External Functions
+//===----------------------------------------------------------------------===//
+
+/// Push a new Value to the LeaderTable onto the list for its value number.
+void GVNPass::LeaderMap::insert(uint32_t N, Value *V, const BasicBlock *BB) {
+ LeaderListNode &Curr = NumToLeaders[N];
+ if (!Curr.Entry.Val) {
+ Curr.Entry.Val = V;
+ Curr.Entry.BB = BB;
+ return;
+ }
+
+ LeaderListNode *Node = TableAllocator.Allocate<LeaderListNode>();
+ Node->Entry.Val = V;
+ Node->Entry.BB = BB;
+ Node->Next = Curr.Next;
+ Curr.Next = Node;
+}
+
+/// Scan the list of values corresponding to a given
+/// value number, and remove the given instruction if encountered.
+void GVNPass::LeaderMap::erase(uint32_t N, Instruction *I,
+ const BasicBlock *BB) {
+ LeaderListNode *Prev = nullptr;
+ LeaderListNode *Curr = &NumToLeaders[N];
+
+ while (Curr && (Curr->Entry.Val != I || Curr->Entry.BB != BB)) {
+ Prev = Curr;
+ Curr = Curr->Next;
+ }
+
+ if (!Curr)
+ return;
+
+ if (Prev) {
+ Prev->Next = Curr->Next;
+ } else {
+ if (!Curr->Next) {
+ Curr->Entry.Val = nullptr;
+ Curr->Entry.BB = nullptr;
+ } else {
+ LeaderListNode *Next = Curr->Next;
+ Curr->Entry.Val = Next->Entry.Val;
+ Curr->Entry.BB = Next->Entry.BB;
+ Curr->Next = Next->Next;
+ }
+ }
+}
+
+void GVNPass::LeaderMap::verifyRemoved(const Value *V) const {
+ // Walk through the value number scope to make sure the instruction isn't
+ // ferreted away in it.
+ for (const auto &I : NumToLeaders) {
+ (void)I;
+ assert(I.second.Entry.Val != V && "Inst still in value numbering scope!");
+ assert(
+ std::none_of(leader_iterator(&I.second), leader_iterator(nullptr),
+ [=](const LeaderTableEntry &E) { return E.Val == V; }) &&
+ "Inst still in value numbering scope!");
+ }
+}
+
+//===----------------------------------------------------------------------===//
// GVN Pass
//===----------------------------------------------------------------------===//
@@ -1008,7 +1072,7 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
GVNPass &gvn) const {
Value *Res;
Type *LoadTy = Load->getType();
- const DataLayout &DL = Load->getModule()->getDataLayout();
+ const DataLayout &DL = Load->getDataLayout();
if (isSimpleValue()) {
Res = getSimpleValue();
if (Res->getType() != LoadTy) {
@@ -1056,7 +1120,8 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
// Introduce a new value select for a load from an eligible pointer select.
SelectInst *Sel = getSelectValue();
assert(V1 && V2 && "both value operands of the select must be present");
- Res = SelectInst::Create(Sel->getCondition(), V1, V2, "", Sel);
+ Res =
+ SelectInst::Create(Sel->getCondition(), V1, V2, "", Sel->getIterator());
} else {
llvm_unreachable("Should not materialize value from dead block");
}
@@ -1173,7 +1238,7 @@ GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
Instruction *DepInst = DepInfo.getInst();
- const DataLayout &DL = Load->getModule()->getDataLayout();
+ const DataLayout &DL = Load->getDataLayout();
if (DepInfo.isClobber()) {
// If the dependence is to a store that writes to a superset of the bits
// read by the load, we can extract the bits we need for the load from the
@@ -1412,10 +1477,10 @@ void GVNPass::eliminatePartiallyRedundantLoad(
BasicBlock *UnavailableBlock = AvailableLoad.first;
Value *LoadPtr = AvailableLoad.second;
- auto *NewLoad =
- new LoadInst(Load->getType(), LoadPtr, Load->getName() + ".pre",
- Load->isVolatile(), Load->getAlign(), Load->getOrdering(),
- Load->getSyncScopeID(), UnavailableBlock->getTerminator());
+ auto *NewLoad = new LoadInst(
+ Load->getType(), LoadPtr, Load->getName() + ".pre", Load->isVolatile(),
+ Load->getAlign(), Load->getOrdering(), Load->getSyncScopeID(),
+ UnavailableBlock->getTerminator()->getIterator());
NewLoad->setDebugLoc(Load->getDebugLoc());
if (MSSAU) {
auto *NewAccess = MSSAU->createMemoryAccessInBB(
@@ -1465,7 +1530,7 @@ void GVNPass::eliminatePartiallyRedundantLoad(
OldLoad->replaceAllUsesWith(NewLoad);
replaceValuesPerBlockEntry(ValuesPerBlock, OldLoad, NewLoad);
if (uint32_t ValNo = VN.lookup(OldLoad, false))
- removeFromLeaderTable(ValNo, OldLoad, OldLoad->getParent());
+ LeaderTable.erase(ValNo, OldLoad, OldLoad->getParent());
VN.erase(OldLoad);
removeInstruction(OldLoad);
}
@@ -1658,7 +1723,7 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
- const DataLayout &DL = Load->getModule()->getDataLayout();
+ const DataLayout &DL = Load->getDataLayout();
SmallVector<Instruction*, 8> NewInsts;
for (auto &PredLoad : PredLoads) {
BasicBlock *UnavailablePred = PredLoad.first;
@@ -1994,8 +2059,9 @@ bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
// Insert a new store to null instruction before the load to indicate that
// this code is not reachable. FIXME: We could insert unreachable
// instruction directly because we can modify the CFG.
- auto *NewS = new StoreInst(PoisonValue::get(Int8Ty),
- Constant::getNullValue(PtrTy), IntrinsicI);
+ auto *NewS =
+ new StoreInst(PoisonValue::get(Int8Ty), Constant::getNullValue(PtrTy),
+ IntrinsicI->getIterator());
if (MSSAU) {
const MemoryUseOrDef *FirstNonDom = nullptr;
const auto *AL =
@@ -2201,10 +2267,9 @@ GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) {
/// defined in \p BB.
bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
GVNPass &Gvn) {
- LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
- while (Vals && Vals->BB == BB)
- Vals = Vals->Next;
- return !Vals;
+ return all_of(
+ Gvn.LeaderTable.getLeaders(Num),
+ [=](const LeaderMap::LeaderTableEntry &L) { return L.BB == BB; });
}
/// Wrap phiTranslateImpl to provide caching functionality.
@@ -2226,12 +2291,11 @@ bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
const BasicBlock *PhiBlock,
GVNPass &Gvn) {
CallInst *Call = nullptr;
- LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
- while (Vals) {
- Call = dyn_cast<CallInst>(Vals->Val);
+ auto Leaders = Gvn.LeaderTable.getLeaders(Num);
+ for (const auto &Entry : Leaders) {
+ Call = dyn_cast<CallInst>(Entry.Val);
if (Call && Call->getParent() == PhiBlock)
break;
- Vals = Vals->Next;
}
if (AA->doesNotAccessMemory(Call))
@@ -2324,23 +2388,17 @@ void GVNPass::ValueTable::eraseTranslateCacheEntry(
// question. This is fast because dominator tree queries consist of only
// a few comparisons of DFS numbers.
Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t num) {
- LeaderTableEntry Vals = LeaderTable[num];
- if (!Vals.Val) return nullptr;
+ auto Leaders = LeaderTable.getLeaders(num);
+ if (Leaders.empty())
+ return nullptr;
Value *Val = nullptr;
- if (DT->dominates(Vals.BB, BB)) {
- Val = Vals.Val;
- if (isa<Constant>(Val)) return Val;
- }
-
- LeaderTableEntry* Next = Vals.Next;
- while (Next) {
- if (DT->dominates(Next->BB, BB)) {
- if (isa<Constant>(Next->Val)) return Next->Val;
- if (!Val) Val = Next->Val;
+ for (const auto &Entry : Leaders) {
+ if (DT->dominates(Entry.BB, BB)) {
+ Val = Entry.Val;
+ if (isa<Constant>(Val))
+ return Val;
}
-
- Next = Next->Next;
}
return Val;
@@ -2417,6 +2475,10 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
std::swap(LHS, RHS);
assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
+ const DataLayout &DL =
+ isa<Argument>(LHS)
+ ? cast<Argument>(LHS)->getParent()->getDataLayout()
+ : cast<Instruction>(LHS)->getDataLayout();
// If there is no obvious reason to prefer the left-hand side over the
// right-hand side, ensure the longest lived term is on the right-hand side,
@@ -2443,23 +2505,32 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
// using the leader table is about compiling faster, not optimizing better).
// The leader table only tracks basic blocks, not edges. Only add to if we
// have the simple case where the edge dominates the end.
- if (RootDominatesEnd && !isa<Instruction>(RHS))
- addToLeaderTable(LVN, RHS, Root.getEnd());
+ if (RootDominatesEnd && !isa<Instruction>(RHS) &&
+ canReplacePointersIfEqual(LHS, RHS, DL))
+ LeaderTable.insert(LVN, RHS, Root.getEnd());
// Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
// LHS always has at least one use that is not dominated by Root, this will
// never do anything if LHS has only one use.
if (!LHS->hasOneUse()) {
+ // Create a callback that captures the DL.
+ auto canReplacePointersCallBack = [&DL](const Use &U, const Value *To) {
+ return canReplacePointersInUseIfEqual(U, To, DL);
+ };
unsigned NumReplacements =
DominatesByEdge
- ? replaceDominatedUsesWith(LHS, RHS, *DT, Root)
- : replaceDominatedUsesWith(LHS, RHS, *DT, Root.getStart());
-
- Changed |= NumReplacements > 0;
- NumGVNEqProp += NumReplacements;
- // Cached information for anything that uses LHS will be invalid.
- if (MD)
- MD->invalidateCachedPointerInfo(LHS);
+ ? replaceDominatedUsesWithIf(LHS, RHS, *DT, Root,
+ canReplacePointersCallBack)
+ : replaceDominatedUsesWithIf(LHS, RHS, *DT, Root.getStart(),
+ canReplacePointersCallBack);
+
+ if (NumReplacements > 0) {
+ Changed = true;
+ NumGVNEqProp += NumReplacements;
+ // Cached information for anything that uses LHS will be invalid.
+ if (MD)
+ MD->invalidateCachedPointerInfo(LHS);
+ }
}
// Now try to deduce additional equalities from this one. For example, if
@@ -2530,7 +2601,7 @@ bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
// The leader table only tracks basic blocks, not edges. Only add to if we
// have the simple case where the edge dominates the end.
if (RootDominatesEnd)
- addToLeaderTable(Num, NotVal, Root.getEnd());
+ LeaderTable.insert(Num, NotVal, Root.getEnd());
continue;
}
@@ -2550,7 +2621,7 @@ bool GVNPass::processInstruction(Instruction *I) {
// to value numbering it. Value numbering often exposes redundancies, for
// example if it determines that %y is equal to %x then the instruction
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
- const DataLayout &DL = I->getModule()->getDataLayout();
+ const DataLayout &DL = I->getDataLayout();
if (Value *V = simplifyInstruction(I, {DL, TLI, DT, AC})) {
bool Changed = false;
if (!I->use_empty()) {
@@ -2580,7 +2651,7 @@ bool GVNPass::processInstruction(Instruction *I) {
return true;
unsigned Num = VN.lookupOrAdd(Load);
- addToLeaderTable(Num, Load, Load->getParent());
+ LeaderTable.insert(Num, Load, Load->getParent());
return false;
}
@@ -2622,8 +2693,8 @@ bool GVNPass::processInstruction(Instruction *I) {
// Remember how many outgoing edges there are to every successor.
SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
- for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i)
- ++SwitchEdges[SI->getSuccessor(i)];
+ for (BasicBlock *Succ : successors(Parent))
+ ++SwitchEdges[Succ];
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i) {
@@ -2648,7 +2719,7 @@ bool GVNPass::processInstruction(Instruction *I) {
// Allocations are always uniquely numbered, so we can save time and memory
// by fast failing them.
if (isa<AllocaInst>(I) || I->isTerminator() || isa<PHINode>(I)) {
- addToLeaderTable(Num, I, I->getParent());
+ LeaderTable.insert(Num, I, I->getParent());
return false;
}
@@ -2656,7 +2727,7 @@ bool GVNPass::processInstruction(Instruction *I) {
// need to do a lookup to see if the number already exists
// somewhere in the domtree: it can't!
if (Num >= NextNum) {
- addToLeaderTable(Num, I, I->getParent());
+ LeaderTable.insert(Num, I, I->getParent());
return false;
}
@@ -2665,7 +2736,7 @@ bool GVNPass::processInstruction(Instruction *I) {
Value *Repl = findLeader(I->getParent(), Num);
if (!Repl) {
// Failure, just remember this instance for future use.
- addToLeaderTable(Num, I, I->getParent());
+ LeaderTable.insert(Num, I, I->getParent());
return false;
}
@@ -2706,7 +2777,7 @@ bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
bool Changed = false;
bool ShouldContinue = true;
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
@@ -2716,6 +2787,7 @@ bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
Changed |= removedBlock;
}
+ DTU.flush();
unsigned Iteration = 0;
while (ShouldContinue) {
@@ -2859,7 +2931,7 @@ bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
VN.add(Instr, Num);
// Update the availability map to include the new instruction.
- addToLeaderTable(Num, Instr, Pred);
+ LeaderTable.insert(Num, Instr, Pred);
return true;
}
@@ -3010,13 +3082,13 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) {
// After creating a new PHI for ValNo, the phi translate result for ValNo will
// be changed, so erase the related stale entries in phi translate cache.
VN.eraseTranslateCacheEntry(ValNo, *CurrentBlock);
- addToLeaderTable(ValNo, Phi, CurrentBlock);
+ LeaderTable.insert(ValNo, Phi, CurrentBlock);
Phi->setDebugLoc(CurInst->getDebugLoc());
CurInst->replaceAllUsesWith(Phi);
if (MD && Phi->getType()->isPtrOrPtrVectorTy())
MD->invalidateCachedPointerInfo(Phi);
VN.erase(CurInst);
- removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
+ LeaderTable.erase(ValNo, CurInst, CurrentBlock);
LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
removeInstruction(CurInst);
@@ -3110,7 +3182,6 @@ void GVNPass::cleanupGlobalSets() {
VN.clear();
LeaderTable.clear();
BlockRPONumber.clear();
- TableAllocator.Reset();
ICF->clear();
InvalidBlockRPONumbers = true;
}
@@ -3130,18 +3201,7 @@ void GVNPass::removeInstruction(Instruction *I) {
/// internal data structures.
void GVNPass::verifyRemoved(const Instruction *Inst) const {
VN.verifyRemoved(Inst);
-
- // Walk through the value number scope to make sure the instruction isn't
- // ferreted away in it.
- for (const auto &I : LeaderTable) {
- const LeaderTableEntry *Node = &I.second;
- assert(Node->Val != Inst && "Inst still in value numbering scope!");
-
- while (Node->Next) {
- Node = Node->Next;
- assert(Node->Val != Inst && "Inst still in value numbering scope!");
- }
- }
+ LeaderTable.verifyRemoved(Inst);
}
/// BB is declared dead, which implied other blocks become dead as well. This
@@ -3268,7 +3328,7 @@ void GVNPass::assignValNumForDeadCode() {
for (BasicBlock *BB : DeadBlocks) {
for (Instruction &Inst : *BB) {
unsigned ValNum = VN.lookupOrAdd(&Inst);
- addToLeaderTable(ValNum, &Inst, BB);
+ LeaderTable.insert(ValNum, &Inst, BB);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index b564f00eb9d1..b5333c532280 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -238,18 +238,6 @@ public:
const VNtoInsns &getStoreVNTable() const { return VNtoCallsStores; }
};
-static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) {
- static const unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
- LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- LLVMContext::MD_range,
- LLVMContext::MD_fpmath,
- LLVMContext::MD_invariant_load,
- LLVMContext::MD_invariant_group,
- LLVMContext::MD_access_group};
- combineMetadata(ReplInst, I, KnownIDs, true);
-}
-
// This pass hoists common computations across branches sharing common
// dominator. The primary goal is to reduce the code size, and in some
// cases reduce critical path (by exposing more ILP).
@@ -951,6 +939,14 @@ void GVNHoist::makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
OtherGep = cast<GetElementPtrInst>(
cast<StoreInst>(OtherInst)->getPointerOperand());
ClonedGep->andIRFlags(OtherGep);
+
+ // Merge debug locations of GEPs, because the hoisted GEP replaces those
+ // in branches. When cloning, ClonedGep preserves the debug location of
+ // Gepd, so Gep is skipped to avoid merging it twice.
+ if (OtherGep != Gep) {
+ ClonedGep->applyMergedLocation(ClonedGep->getDebugLoc(),
+ OtherGep->getDebugLoc());
+ }
}
// Replace uses of Gep with ClonedGep in Repl.
@@ -988,8 +984,8 @@ unsigned GVNHoist::rauw(const SmallVecInsn &Candidates, Instruction *Repl,
MSSAUpdater->removeMemoryAccess(OldMA);
}
+ combineMetadataForCSE(Repl, I, true);
Repl->andIRFlags(I);
- combineKnownMetadata(Repl, I);
I->replaceAllUsesWith(Repl);
// Also invalidate the Alias Analysis cache.
MD->removeInstruction(I);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp
index 2b38831139a5..3dfa2dd9df27 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -132,7 +132,7 @@ public:
ActiveBlocks.remove(BB);
continue;
}
- Insts.push_back(BB->getTerminator()->getPrevNode());
+ Insts.push_back(BB->getTerminator()->getPrevNonDebugInstruction());
}
if (Insts.empty())
Fail = true;
@@ -168,7 +168,7 @@ public:
if (Inst == &Inst->getParent()->front())
ActiveBlocks.remove(Inst->getParent());
else
- NewInsts.push_back(Inst->getPrevNode());
+ NewInsts.push_back(Inst->getPrevNonDebugInstruction());
}
if (NewInsts.empty()) {
Fail = true;
@@ -226,12 +226,22 @@ class ModelledPHI {
public:
ModelledPHI() = default;
- ModelledPHI(const PHINode *PN) {
- // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
- SmallVector<std::pair<BasicBlock *, Value *>, 4> Ops;
+ ModelledPHI(const PHINode *PN,
+ const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
+ // BasicBlock comes first so we sort by basic block pointer order,
+ // then by value pointer order. No need to call `verifyModelledPHI`
+ // As the Values and Blocks are populated in a deterministic order.
+ using OpsType = std::pair<BasicBlock *, Value *>;
+ SmallVector<OpsType, 4> Ops;
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
- llvm::sort(Ops);
+
+ auto ComesBefore = [BlockOrder](OpsType O1, OpsType O2) {
+ return BlockOrder.lookup(O1.first) < BlockOrder.lookup(O2.first);
+ };
+ // Sort in a deterministic order.
+ llvm::sort(Ops, ComesBefore);
+
for (auto &P : Ops) {
Blocks.push_back(P.first);
Values.push_back(P.second);
@@ -247,16 +257,38 @@ public:
return M;
}
+ void
+ verifyModelledPHI(const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
+ assert(Values.size() > 1 && Blocks.size() > 1 &&
+ "Modelling PHI with less than 2 values");
+ auto ComesBefore = [BlockOrder](const BasicBlock *BB1,
+ const BasicBlock *BB2) {
+ return BlockOrder.lookup(BB1) < BlockOrder.lookup(BB2);
+ };
+ assert(llvm::is_sorted(Blocks, ComesBefore));
+ int C = 0;
+ for (const Value *V : Values) {
+ if (!isa<UndefValue>(V)) {
+ assert(cast<Instruction>(V)->getParent() == Blocks[C]);
+ (void)C;
+ }
+ C++;
+ }
+ }
/// Create a PHI from an array of incoming values and incoming blocks.
- template <typename VArray, typename BArray>
- ModelledPHI(const VArray &V, const BArray &B) {
+ ModelledPHI(SmallVectorImpl<Instruction *> &V,
+ SmallSetVector<BasicBlock *, 4> &B,
+ const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
+ // The order of Values and Blocks are already ordered by the caller.
llvm::copy(V, std::back_inserter(Values));
llvm::copy(B, std::back_inserter(Blocks));
+ verifyModelledPHI(BlockOrder);
}
/// Create a PHI from [I[OpNum] for I in Insts].
- template <typename BArray>
- ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
+ /// TODO: Figure out a way to verifyModelledPHI in this constructor.
+ ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum,
+ SmallSetVector<BasicBlock *, 4> &B) {
llvm::copy(B, std::back_inserter(Blocks));
for (auto *I : Insts)
Values.push_back(I->getOperand(OpNum));
@@ -297,7 +329,8 @@ public:
// Hash functor
unsigned hash() const {
- return (unsigned)hash_combine_range(Values.begin(), Values.end());
+ // Is deterministic because Values are saved in a specific order.
+ return (unsigned)hash_combine_range(Values.begin(), Values.end());
}
bool operator==(const ModelledPHI &Other) const {
@@ -566,7 +599,7 @@ public:
class GVNSink {
public:
- GVNSink() = default;
+ GVNSink() {}
bool run(Function &F) {
LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
@@ -575,6 +608,16 @@ public:
unsigned NumSunk = 0;
ReversePostOrderTraversal<Function*> RPOT(&F);
VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end()));
+ // Populate reverse post-order to order basic blocks in deterministic
+ // order. Any arbitrary ordering will work in this case as long as they are
+ // deterministic. The node ordering of newly created basic blocks
+ // are irrelevant because RPOT(for computing sinkable candidates) is also
+ // obtained ahead of time and only their order are relevant for this pass.
+ unsigned NodeOrdering = 0;
+ RPOTOrder[*RPOT.begin()] = ++NodeOrdering;
+ for (auto *BB : RPOT)
+ if (!pred_empty(BB))
+ RPOTOrder[BB] = ++NodeOrdering;
for (auto *N : RPOT)
NumSunk += sinkBB(N);
@@ -583,6 +626,7 @@ public:
private:
ValueTable VN;
+ DenseMap<const BasicBlock *, unsigned> RPOTOrder;
bool shouldAvoidSinkingInstruction(Instruction *I) {
// These instructions may change or break semantics if moved.
@@ -603,7 +647,7 @@ private:
void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
SmallPtrSetImpl<Value *> &PHIContents) {
for (PHINode &PN : BB->phis()) {
- auto MPHI = ModelledPHI(&PN);
+ auto MPHI = ModelledPHI(&PN, RPOTOrder);
PHIs.insert(MPHI);
for (auto *V : MPHI.getValues())
PHIContents.insert(V);
@@ -655,8 +699,7 @@ GVNSink::analyzeInstructionForSinking(LockstepReverseIterator &LRI,
return std::nullopt;
VNums[N]++;
}
- unsigned VNumToSink =
- std::max_element(VNums.begin(), VNums.end(), llvm::less_second())->first;
+ unsigned VNumToSink = llvm::max_element(VNums, llvm::less_second())->first;
if (VNums[VNumToSink] == 1)
// Can't sink anything!
@@ -692,7 +735,7 @@ GVNSink::analyzeInstructionForSinking(LockstepReverseIterator &LRI,
}
// The sunk instruction's results.
- ModelledPHI NewPHI(NewInsts, ActivePreds);
+ ModelledPHI NewPHI(NewInsts, ActivePreds, RPOTOrder);
// Does sinking this instruction render previous PHIs redundant?
if (NeededPHIs.erase(NewPHI))
@@ -720,12 +763,11 @@ GVNSink::analyzeInstructionForSinking(LockstepReverseIterator &LRI,
// try and continue making progress.
Instruction *I0 = NewInsts[0];
- // If all instructions that are going to participate don't have the same
- // number of operands, we can't do any useful PHI analysis for all operands.
- auto hasDifferentNumOperands = [&I0](Instruction *I) {
- return I->getNumOperands() != I0->getNumOperands();
+ auto isNotSameOperation = [&I0](Instruction *I) {
+ return !I0->isSameOperationAs(I);
};
- if (any_of(NewInsts, hasDifferentNumOperands))
+
+ if (any_of(NewInsts, isNotSameOperation))
return std::nullopt;
for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
@@ -767,6 +809,9 @@ unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
SmallVector<BasicBlock *, 4> Preds;
for (auto *B : predecessors(BBEnd)) {
+ // Bailout on basic blocks without predecessor(PR42346).
+ if (!RPOTOrder.count(B))
+ return 0;
auto *T = B->getTerminator();
if (isa<BranchInst>(T) || isa<SwitchInst>(T))
Preds.push_back(B);
@@ -775,7 +820,11 @@ unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
}
if (Preds.size() < 2)
return 0;
- llvm::sort(Preds);
+ auto ComesBefore = [this](const BasicBlock *BB1, const BasicBlock *BB2) {
+ return RPOTOrder.lookup(BB1) < RPOTOrder.lookup(BB2);
+ };
+ // Sort in a deterministic order.
+ llvm::sort(Preds, ComesBefore);
unsigned NumOrigPreds = Preds.size();
// We can only sink instructions through unconditional branches.
@@ -834,7 +883,7 @@ void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
BasicBlock *BBEnd) {
SmallVector<Instruction *, 4> Insts;
for (BasicBlock *BB : Blocks)
- Insts.push_back(BB->getTerminator()->getPrevNode());
+ Insts.push_back(BB->getTerminator()->getPrevNonDebugInstruction());
Instruction *I0 = Insts.front();
SmallVector<Value *, 4> NewOperands;
@@ -872,8 +921,10 @@ void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
}
for (auto *I : Insts)
- if (I != I0)
+ if (I != I0) {
I->replaceAllUsesWith(I0);
+ I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
+ }
foldPointlessPHINodes(BBEnd);
// Finally nuke all instructions apart from the common instruction.
@@ -890,5 +941,6 @@ PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
GVNSink G;
if (!G.run(F))
return PreservedAnalyses::all();
+
return PreservedAnalyses::none();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index 3bbf6642a90c..e7ff2a14469c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -52,6 +52,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -121,12 +122,13 @@ static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) {
/// condition should stay invariant. Otherwise there can be a miscompile, like
/// the one described at https://github.com/llvm/llvm-project/issues/60234. The
/// safest way to do it is to expand the new condition at WC's block.
-static Instruction *findInsertionPointForWideCondition(Instruction *WCOrGuard) {
+static std::optional<BasicBlock::iterator>
+findInsertionPointForWideCondition(Instruction *WCOrGuard) {
if (isGuard(WCOrGuard))
- return WCOrGuard;
+ return WCOrGuard->getIterator();
if (auto WC = extractWidenableCondition(WCOrGuard))
- return cast<Instruction>(WC);
- return nullptr;
+ return cast<Instruction>(WC)->getIterator();
+ return std::nullopt;
}
class GuardWideningImpl {
@@ -182,30 +184,30 @@ class GuardWideningImpl {
/// into \p WideningPoint.
WideningScore computeWideningScore(Instruction *DominatedInstr,
Instruction *ToWiden,
- Instruction *WideningPoint,
+ BasicBlock::iterator WideningPoint,
SmallVectorImpl<Value *> &ChecksToHoist,
SmallVectorImpl<Value *> &ChecksToWiden);
/// Helper to check if \p V can be hoisted to \p InsertPos.
- bool canBeHoistedTo(const Value *V, const Instruction *InsertPos) const {
+ bool canBeHoistedTo(const Value *V, BasicBlock::iterator InsertPos) const {
SmallPtrSet<const Instruction *, 8> Visited;
return canBeHoistedTo(V, InsertPos, Visited);
}
- bool canBeHoistedTo(const Value *V, const Instruction *InsertPos,
+ bool canBeHoistedTo(const Value *V, BasicBlock::iterator InsertPos,
SmallPtrSetImpl<const Instruction *> &Visited) const;
bool canBeHoistedTo(const SmallVectorImpl<Value *> &Checks,
- const Instruction *InsertPos) const {
+ BasicBlock::iterator InsertPos) const {
return all_of(Checks,
[&](const Value *V) { return canBeHoistedTo(V, InsertPos); });
}
/// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c
/// canBeHoistedTo returned true.
- void makeAvailableAt(Value *V, Instruction *InsertPos) const;
+ void makeAvailableAt(Value *V, BasicBlock::iterator InsertPos) const;
void makeAvailableAt(const SmallVectorImpl<Value *> &Checks,
- Instruction *InsertPos) const {
+ BasicBlock::iterator InsertPos) const {
for (Value *V : Checks)
makeAvailableAt(V, InsertPos);
}
@@ -217,18 +219,19 @@ class GuardWideningImpl {
/// InsertPt is true then actually generate the resulting expression, make it
/// available at \p InsertPt and return it in \p Result (else no change to the
/// IR is made).
- std::optional<Value *> mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist,
- SmallVectorImpl<Value *> &ChecksToWiden,
- Instruction *InsertPt);
+ std::optional<Value *>
+ mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist,
+ SmallVectorImpl<Value *> &ChecksToWiden,
+ std::optional<BasicBlock::iterator> InsertPt);
/// Generate the logical AND of \p ChecksToHoist and \p OldCondition and make
/// it available at InsertPt
Value *hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist,
- Value *OldCondition, Instruction *InsertPt);
+ Value *OldCondition, BasicBlock::iterator InsertPt);
/// Adds freeze to Orig and push it as far as possible very aggressively.
/// Also replaces all uses of frozen instruction with frozen version.
- Value *freezeAndPush(Value *Orig, Instruction *InsertPt);
+ Value *freezeAndPush(Value *Orig, BasicBlock::iterator InsertPt);
/// Represents a range check of the form \c Base + \c Offset u< \c Length,
/// with the constraint that \c Length is not negative. \c CheckInst is the
@@ -294,7 +297,7 @@ class GuardWideningImpl {
/// for the price of computing only one of the set of expressions?
bool isWideningCondProfitable(SmallVectorImpl<Value *> &ChecksToHoist,
SmallVectorImpl<Value *> &ChecksToWiden) {
- return mergeChecks(ChecksToHoist, ChecksToWiden, /*InsertPt=*/nullptr)
+ return mergeChecks(ChecksToHoist, ChecksToWiden, /*InsertPt=*/std::nullopt)
.has_value();
}
@@ -302,11 +305,11 @@ class GuardWideningImpl {
void widenGuard(SmallVectorImpl<Value *> &ChecksToHoist,
SmallVectorImpl<Value *> &ChecksToWiden,
Instruction *ToWiden) {
- Instruction *InsertPt = findInsertionPointForWideCondition(ToWiden);
+ auto InsertPt = findInsertionPointForWideCondition(ToWiden);
auto MergedCheck = mergeChecks(ChecksToHoist, ChecksToWiden, InsertPt);
Value *Result = MergedCheck ? *MergedCheck
: hoistChecks(ChecksToHoist,
- getCondition(ToWiden), InsertPt);
+ getCondition(ToWiden), *InsertPt);
if (isGuardAsWidenableBranch(ToWiden)) {
setWidenableBranchCond(cast<BranchInst>(ToWiden), Result);
@@ -417,12 +420,12 @@ bool GuardWideningImpl::eliminateInstrViaWidening(
assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?");
for (auto *Candidate : make_range(I, E)) {
- auto *WideningPoint = findInsertionPointForWideCondition(Candidate);
+ auto WideningPoint = findInsertionPointForWideCondition(Candidate);
if (!WideningPoint)
continue;
SmallVector<Value *> CandidateChecks;
parseWidenableGuard(Candidate, CandidateChecks);
- auto Score = computeWideningScore(Instr, Candidate, WideningPoint,
+ auto Score = computeWideningScore(Instr, Candidate, *WideningPoint,
ChecksToHoist, CandidateChecks);
LLVM_DEBUG(dbgs() << "Score between " << *Instr << " and " << *Candidate
<< " is " << scoreTypeToString(Score) << "\n");
@@ -456,7 +459,7 @@ bool GuardWideningImpl::eliminateInstrViaWidening(
GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
Instruction *DominatedInstr, Instruction *ToWiden,
- Instruction *WideningPoint, SmallVectorImpl<Value *> &ChecksToHoist,
+ BasicBlock::iterator WideningPoint, SmallVectorImpl<Value *> &ChecksToHoist,
SmallVectorImpl<Value *> &ChecksToWiden) {
Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent());
Loop *DominatingGuardLoop = LI.getLoopFor(WideningPoint->getParent());
@@ -559,7 +562,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
}
bool GuardWideningImpl::canBeHoistedTo(
- const Value *V, const Instruction *Loc,
+ const Value *V, BasicBlock::iterator Loc,
SmallPtrSetImpl<const Instruction *> &Visited) const {
auto *Inst = dyn_cast<Instruction>(V);
if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst))
@@ -580,7 +583,8 @@ bool GuardWideningImpl::canBeHoistedTo(
[&](Value *Op) { return canBeHoistedTo(Op, Loc, Visited); });
}
-void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
+void GuardWideningImpl::makeAvailableAt(Value *V,
+ BasicBlock::iterator Loc) const {
auto *Inst = dyn_cast<Instruction>(V);
if (!Inst || DT.dominates(Inst, Loc))
return;
@@ -592,7 +596,7 @@ void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
for (Value *Op : Inst->operands())
makeAvailableAt(Op, Loc);
- Inst->moveBefore(Loc);
+ Inst->moveBefore(*Loc->getParent(), Loc);
}
// Return Instruction before which we can insert freeze for the value V as close
@@ -621,14 +625,15 @@ getFreezeInsertPt(Value *V, const DominatorTree &DT) {
return Res;
}
-Value *GuardWideningImpl::freezeAndPush(Value *Orig, Instruction *InsertPt) {
+Value *GuardWideningImpl::freezeAndPush(Value *Orig,
+ BasicBlock::iterator InsertPt) {
if (isGuaranteedNotToBePoison(Orig, nullptr, InsertPt, &DT))
return Orig;
std::optional<BasicBlock::iterator> InsertPtAtDef =
getFreezeInsertPt(Orig, DT);
if (!InsertPtAtDef) {
FreezeInst *FI = new FreezeInst(Orig, "gw.freeze");
- FI->insertBefore(InsertPt);
+ FI->insertBefore(*InsertPt->getParent(), InsertPt);
return FI;
}
if (isa<Constant>(Orig) || isa<GlobalValue>(Orig)) {
@@ -695,7 +700,7 @@ Value *GuardWideningImpl::freezeAndPush(Value *Orig, Instruction *InsertPt) {
Worklist.push_back(U.get());
}
for (Instruction *I : DropPoisonFlags)
- I->dropPoisonGeneratingFlagsAndMetadata();
+ I->dropPoisonGeneratingAnnotations();
Value *Result = Orig;
for (Value *V : NeedFreeze) {
@@ -715,7 +720,7 @@ Value *GuardWideningImpl::freezeAndPush(Value *Orig, Instruction *InsertPt) {
std::optional<Value *>
GuardWideningImpl::mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist,
SmallVectorImpl<Value *> &ChecksToWiden,
- Instruction *InsertPt) {
+ std::optional<BasicBlock::iterator> InsertPt) {
using namespace llvm::PatternMatch;
Value *Result = nullptr;
@@ -747,10 +752,10 @@ GuardWideningImpl::mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist,
if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) {
if (InsertPt) {
ConstantInt *NewRHS =
- ConstantInt::get(InsertPt->getContext(), NewRHSAP);
- assert(canBeHoistedTo(LHS, InsertPt) && "must be");
- makeAvailableAt(LHS, InsertPt);
- Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
+ ConstantInt::get((*InsertPt)->getContext(), NewRHSAP);
+ assert(canBeHoistedTo(LHS, *InsertPt) && "must be");
+ makeAvailableAt(LHS, *InsertPt);
+ Result = new ICmpInst(*InsertPt, Pred, LHS, NewRHS, "wide.chk");
}
return Result;
}
@@ -765,16 +770,16 @@ GuardWideningImpl::mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist,
combineRangeChecks(Checks, CombinedChecks)) {
if (InsertPt) {
for (auto &RC : CombinedChecks) {
- makeAvailableAt(RC.getCheckInst(), InsertPt);
+ makeAvailableAt(RC.getCheckInst(), *InsertPt);
if (Result)
Result = BinaryOperator::CreateAnd(RC.getCheckInst(), Result, "",
- InsertPt);
+ *InsertPt);
else
Result = RC.getCheckInst();
}
assert(Result && "Failed to find result value");
Result->setName("wide.chk");
- Result = freezeAndPush(Result, InsertPt);
+ Result = freezeAndPush(Result, *InsertPt);
}
return Result;
}
@@ -786,9 +791,9 @@ GuardWideningImpl::mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist,
Value *GuardWideningImpl::hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist,
Value *OldCondition,
- Instruction *InsertPt) {
+ BasicBlock::iterator InsertPt) {
assert(!ChecksToHoist.empty());
- IRBuilder<> Builder(InsertPt);
+ IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
makeAvailableAt(ChecksToHoist, InsertPt);
makeAvailableAt(OldCondition, InsertPt);
Value *Result = Builder.CreateAnd(ChecksToHoist);
@@ -812,7 +817,7 @@ bool GuardWideningImpl::parseRangeChecks(
if (IC->getPredicate() == ICmpInst::ICMP_UGT)
std::swap(CmpLHS, CmpRHS);
- auto &DL = IC->getModule()->getDataLayout();
+ auto &DL = IC->getDataLayout();
GuardWideningImpl::RangeCheck Check(
CmpLHS, cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType())),
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 41c4d6236173..5e2131b0b180 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -70,6 +70,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -137,6 +138,8 @@ class IndVarSimplify {
SmallVector<WeakTrackingVH, 16> DeadInsts;
bool WidenIndVars;
+ bool RunUnswitching = false;
+
bool handleFloatingPointIV(Loop *L, PHINode *PH);
bool rewriteNonIntegerIVs(Loop *L);
@@ -170,6 +173,8 @@ public:
}
bool run(Loop *L);
+
+ bool runUnswitching() const { return RunUnswitching; }
};
} // end anonymous namespace
@@ -350,18 +355,22 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
// Insert new integer induction variable.
- PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
+ PHINode *NewPHI =
+ PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
PN->getIncomingBlock(IncomingEdge));
+ NewPHI->setDebugLoc(PN->getDebugLoc());
- Value *NewAdd =
- BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
- Incr->getName()+".int", Incr);
+ Instruction *NewAdd =
+ BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
+ Incr->getName() + ".int", Incr->getIterator());
+ NewAdd->setDebugLoc(Incr->getDebugLoc());
NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
- ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
- ConstantInt::get(Int32Ty, ExitValue),
- Compare->getName());
+ ICmpInst *NewCompare =
+ new ICmpInst(TheBr->getIterator(), NewPred, NewAdd,
+ ConstantInt::get(Int32Ty, ExitValue), Compare->getName());
+ NewCompare->setDebugLoc(Compare->getDebugLoc());
// In the following deletions, PN may become dead and may be deleted.
// Use a WeakTrackingVH to observe whether this happens.
@@ -385,8 +394,9 @@ bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
// We give preference to sitofp over uitofp because it is faster on most
// platforms.
if (WeakPH) {
- Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
- &*PN->getParent()->getFirstInsertionPt());
+ Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
+ PN->getParent()->getFirstInsertionPt());
+ Conv->setDebugLoc(PN->getDebugLoc());
PN->replaceAllUsesWith(Conv);
RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
}
@@ -508,7 +518,7 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
Type *Ty = Cast->getType();
uint64_t Width = SE->getTypeSizeInBits(Ty);
- if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
+ if (!Cast->getDataLayout().isLegalInteger(Width))
return;
// Check that `Cast` actually extends the induction variable (we rely on this
@@ -614,9 +624,11 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
// Information about sign/zero extensions of CurrIV.
IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
- Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
- &Visitor);
+ const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
+ Rewriter, &Visitor);
+ Changed |= C;
+ RunUnswitching |= U;
if (Visitor.WI.WidestNativeType) {
WideIVs.push_back(Visitor.WI);
}
@@ -833,7 +845,7 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
const SCEV *BestInit = nullptr;
BasicBlock *LatchBlock = L->getLoopLatch();
assert(LatchBlock && "Must be in simplified form");
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ const DataLayout &DL = L->getHeader()->getDataLayout();
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
PHINode *Phi = cast<PHINode>(I);
@@ -1220,7 +1232,7 @@ static void replaceLoopPHINodesWithPreheaderValues(
if (!L->contains(I))
continue;
- Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
+ Value *Res = simplifyInstruction(I, I->getDataLayout());
if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
for (User *U : I->users())
Worklist.push_back(cast<Instruction>(U));
@@ -1451,7 +1463,7 @@ bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
continue;
- const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
+ const DataLayout &DL = ExitingBB->getDataLayout();
const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
auto FullCR = ConstantRange::getFull(InnerBitWidth);
@@ -1516,9 +1528,9 @@ bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
// loop varying work to loop-invariant work.
auto doRotateTransform = [&]() {
assert(ICmp->isUnsigned() && "must have proven unsigned already");
- auto *NewRHS =
- CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
- L->getLoopPreheader()->getTerminator());
+ auto *NewRHS = CastInst::Create(
+ Instruction::Trunc, RHS, LHSOp->getType(), "",
+ L->getLoopPreheader()->getTerminator()->getIterator());
ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
if (LHS->use_empty())
@@ -1526,7 +1538,7 @@ bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
};
- const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
+ const DataLayout &DL = ExitingBB->getDataLayout();
const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
auto FullCR = ConstantRange::getFull(InnerBitWidth);
@@ -1873,6 +1885,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
if (OldCond->use_empty())
DeadInsts.emplace_back(OldCond);
Changed = true;
+ RunUnswitching = true;
}
return Changed;
@@ -2049,7 +2062,7 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
Function *F = L.getHeader()->getParent();
- const DataLayout &DL = F->getParent()->getDataLayout();
+ const DataLayout &DL = F->getDataLayout();
IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
WidenIndVars && AllowIVWidening);
@@ -2058,6 +2071,11 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
auto PA = getLoopPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
+ if (IVS.runUnswitching()) {
+ AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(L, AR);
+ PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
+ }
+
if (AR.MSSA)
PA.preserve<MemorySSAAnalysis>();
return PA;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 1bf50d79e533..c9be8ee00cdc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -642,6 +642,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(I->getType(), AS);
auto *NewI = new AddrSpaceCastInst(I, NewPtrTy);
NewI->insertAfter(I);
+ NewI->setDebugLoc(I->getDebugLoc());
return NewI;
}
@@ -821,7 +822,7 @@ unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1,
}
bool InferAddressSpacesImpl::run(Function &F) {
- DL = &F.getParent()->getDataLayout();
+ DL = &F.getDataLayout();
if (AssumeDefaultIsFlatAddressSpace)
FlatAddrSpace = 0;
@@ -1221,6 +1222,7 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
Value::use_iterator I, E, Next;
for (I = V->use_begin(), E = V->use_end(); I != E;) {
Use &U = *I;
+ User *CurUser = U.getUser();
// Some users may see the same pointer operand in multiple operands. Skip
// to the next instruction.
@@ -1231,11 +1233,10 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
// If V is used as the pointer operand of a compatible memory operation,
// sets the pointer operand to NewV. This replacement does not change
// the element type, so the resultant load/store is still valid.
- U.set(NewV);
+ CurUser->replaceUsesOfWith(V, NewV);
continue;
}
- User *CurUser = U.getUser();
// Skip if the current user is the new value itself.
if (CurUser == NewV)
continue;
@@ -1311,10 +1312,13 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
while (isa<PHINode>(InsertPos))
++InsertPos;
- U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
+ // This instruction may contain multiple uses of V, update them all.
+ CurUser->replaceUsesOfWith(
+ V, new AddrSpaceCastInst(NewV, V->getType(), "", InsertPos));
} else {
- U.set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
- V->getType()));
+ CurUser->replaceUsesOfWith(
+ V, ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
+ V->getType()));
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAlignment.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAlignment.cpp
index b75b8d486fbb..6e0c206bd198 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAlignment.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InferAlignment.cpp
@@ -48,7 +48,7 @@ static bool tryToImproveAlign(
}
bool inferAlignment(Function &F, AssumptionCache &AC, DominatorTree &DT) {
- const DataLayout &DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getDataLayout();
bool Changed = false;
// Enforce preferred type alignment if possible. We do this as a separate
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
index ee9452ce1c7d..326849a4eb39 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
@@ -99,7 +99,7 @@ struct InstSimplifyLegacyPass : public FunctionPass {
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
AssumptionCache *AC =
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- const DataLayout &DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getDataLayout();
const SimplifyQuery SQ(DL, TLI, DT, AC);
return runImpl(F, SQ);
}
@@ -125,7 +125,7 @@ PreservedAnalyses InstSimplifyPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
- const DataLayout &DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getDataLayout();
const SimplifyQuery SQ(DL, &TLI, &DT, &AC);
bool Changed = runImpl(F, SQ);
if (!Changed)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
new file mode 100644
index 000000000000..2a4f68e12525
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
@@ -0,0 +1,190 @@
+//===- JumpTableToSwitch.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/JumpTableToSwitch.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+ JumpTableSizeThreshold("jump-table-to-switch-size-threshold", cl::Hidden,
+ cl::desc("Only split jump tables with size less or "
+ "equal than JumpTableSizeThreshold."),
+ cl::init(10));
+
+// TODO: Consider adding a cost model for profitability analysis of this
+// transformation. Currently we replace a jump table with a switch if all the
+// functions in the jump table are smaller than the provided threshold.
+static cl::opt<unsigned> FunctionSizeThreshold(
+ "jump-table-to-switch-function-size-threshold", cl::Hidden,
+ cl::desc("Only split jump tables containing functions whose sizes are less "
+ "or equal than this threshold."),
+ cl::init(50));
+
+#define DEBUG_TYPE "jump-table-to-switch"
+
+namespace {
+struct JumpTableTy {
+ Value *Index;
+ SmallVector<Function *, 10> Funcs;
+};
+} // anonymous namespace
+
+static std::optional<JumpTableTy> parseJumpTable(GetElementPtrInst *GEP,
+ PointerType *PtrTy) {
+ Constant *Ptr = dyn_cast<Constant>(GEP->getPointerOperand());
+ if (!Ptr)
+ return std::nullopt;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr);
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ return std::nullopt;
+
+ Function &F = *GEP->getParent()->getParent();
+ const DataLayout &DL = F.getDataLayout();
+ const unsigned BitWidth =
+ DL.getIndexSizeInBits(GEP->getPointerAddressSpace());
+ MapVector<Value *, APInt> VariableOffsets;
+ APInt ConstantOffset(BitWidth, 0);
+ if (!GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset))
+ return std::nullopt;
+ if (VariableOffsets.size() != 1)
+ return std::nullopt;
+ // TODO: consider supporting more general patterns
+ if (!ConstantOffset.isZero())
+ return std::nullopt;
+ APInt StrideBytes = VariableOffsets.front().second;
+ const uint64_t JumpTableSizeBytes = DL.getTypeAllocSize(GV->getValueType());
+ if (JumpTableSizeBytes % StrideBytes.getZExtValue() != 0)
+ return std::nullopt;
+ const uint64_t N = JumpTableSizeBytes / StrideBytes.getZExtValue();
+ if (N > JumpTableSizeThreshold)
+ return std::nullopt;
+
+ JumpTableTy JumpTable;
+ JumpTable.Index = VariableOffsets.front().first;
+ JumpTable.Funcs.reserve(N);
+ for (uint64_t Index = 0; Index < N; ++Index) {
+ // ConstantOffset is zero.
+ APInt Offset = Index * StrideBytes;
+ Constant *C =
+ ConstantFoldLoadFromConst(GV->getInitializer(), PtrTy, Offset, DL);
+ auto *Func = dyn_cast_or_null<Function>(C);
+ if (!Func || Func->isDeclaration() ||
+ Func->getInstructionCount() > FunctionSizeThreshold)
+ return std::nullopt;
+ JumpTable.Funcs.push_back(Func);
+ }
+ return JumpTable;
+}
+
+static BasicBlock *expandToSwitch(CallBase *CB, const JumpTableTy &JT,
+ DomTreeUpdater &DTU,
+ OptimizationRemarkEmitter &ORE) {
+ const bool IsVoid = CB->getType() == Type::getVoidTy(CB->getContext());
+
+ SmallVector<DominatorTree::UpdateType, 8> DTUpdates;
+ BasicBlock *BB = CB->getParent();
+ BasicBlock *Tail = SplitBlock(BB, CB, &DTU, nullptr, nullptr,
+ BB->getName() + Twine(".tail"));
+ DTUpdates.push_back({DominatorTree::Delete, BB, Tail});
+ BB->getTerminator()->eraseFromParent();
+
+ Function &F = *BB->getParent();
+ BasicBlock *BBUnreachable = BasicBlock::Create(
+ F.getContext(), "default.switch.case.unreachable", &F, Tail);
+ IRBuilder<> BuilderUnreachable(BBUnreachable);
+ BuilderUnreachable.CreateUnreachable();
+
+ IRBuilder<> Builder(BB);
+ SwitchInst *Switch = Builder.CreateSwitch(JT.Index, BBUnreachable);
+ DTUpdates.push_back({DominatorTree::Insert, BB, BBUnreachable});
+
+ IRBuilder<> BuilderTail(CB);
+ PHINode *PHI =
+ IsVoid ? nullptr : BuilderTail.CreatePHI(CB->getType(), JT.Funcs.size());
+
+ for (auto [Index, Func] : llvm::enumerate(JT.Funcs)) {
+ BasicBlock *B = BasicBlock::Create(Func->getContext(),
+ "call." + Twine(Index), &F, Tail);
+ DTUpdates.push_back({DominatorTree::Insert, BB, B});
+ DTUpdates.push_back({DominatorTree::Insert, B, Tail});
+
+ CallBase *Call = cast<CallBase>(CB->clone());
+ Call->setCalledFunction(Func);
+ Call->insertInto(B, B->end());
+ Switch->addCase(
+ cast<ConstantInt>(ConstantInt::get(JT.Index->getType(), Index)), B);
+ BranchInst::Create(Tail, B);
+ if (PHI)
+ PHI->addIncoming(Call, B);
+ }
+ DTU.applyUpdates(DTUpdates);
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "ReplacedJumpTableWithSwitch", CB)
+ << "expanded indirect call into switch";
+ });
+ if (PHI)
+ CB->replaceAllUsesWith(PHI);
+ CB->eraseFromParent();
+ return Tail;
+}
+
+PreservedAnalyses JumpTableToSwitchPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ OptimizationRemarkEmitter &ORE =
+ AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ DominatorTree *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ PostDominatorTree *PDT = AM.getCachedResult<PostDominatorTreeAnalysis>(F);
+ DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy);
+ bool Changed = false;
+ for (BasicBlock &BB : make_early_inc_range(F)) {
+ BasicBlock *CurrentBB = &BB;
+ while (CurrentBB) {
+ BasicBlock *SplittedOutTail = nullptr;
+ for (Instruction &I : make_early_inc_range(*CurrentBB)) {
+ auto *Call = dyn_cast<CallInst>(&I);
+ if (!Call || Call->getCalledFunction() || Call->isMustTailCall())
+ continue;
+ auto *L = dyn_cast<LoadInst>(Call->getCalledOperand());
+ // Skip atomic or volatile loads.
+ if (!L || !L->isSimple())
+ continue;
+ auto *GEP = dyn_cast<GetElementPtrInst>(L->getPointerOperand());
+ if (!GEP)
+ continue;
+ auto *PtrTy = dyn_cast<PointerType>(L->getType());
+ assert(PtrTy && "call operand must be a pointer");
+ std::optional<JumpTableTy> JumpTable = parseJumpTable(GEP, PtrTy);
+ if (!JumpTable)
+ continue;
+ SplittedOutTail = expandToSwitch(Call, *JumpTable, DTU, ORE);
+ Changed = true;
+ break;
+ }
+ CurrentBB = SplittedOutTail ? SplittedOutTail : nullptr;
+ }
+ }
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ if (DT)
+ PA.preserve<DominatorTreeAnalysis>();
+ if (PDT)
+ PA.preserve<PostDominatorTreeAnalysis>();
+ return PA;
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 87c01ead634f..7a0b661a0779 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -231,7 +231,7 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
Weights[0] = BP.getCompl().getNumerator();
Weights[1] = BP.getNumerator();
}
- setBranchWeights(*PredBr, Weights);
+ setBranchWeights(*PredBr, Weights, hasBranchWeightOrigin(*PredBr));
}
}
@@ -401,8 +401,8 @@ static bool replaceFoldableUses(Instruction *Cond, Value *ToVal,
Changed |= replaceNonLocalUsesWith(Cond, ToVal);
for (Instruction &I : reverse(*KnownAtEndOfBB)) {
// Replace any debug-info record users of Cond with ToVal.
- for (DPValue &DPV : I.getDbgValueRange())
- DPV.replaceVariableLocationOp(Cond, ToVal, true);
+ for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
+ DVR.replaceVariableLocationOp(Cond, ToVal, true);
// Reached the Cond whose uses we are trying to replace, so there are no
// more uses.
@@ -558,9 +558,9 @@ static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
/// This returns true if there were any known values.
bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
Value *V, BasicBlock *BB, PredValueInfo &Result,
- ConstantPreference Preference, DenseSet<Value *> &RecursionSet,
+ ConstantPreference Preference, SmallPtrSet<Value *, 4> &RecursionSet,
Instruction *CxtI) {
- const DataLayout &DL = BB->getModule()->getDataLayout();
+ const DataLayout &DL = BB->getDataLayout();
// This method walks up use-def chains recursively. Because of this, we could
// get into an infinite loop going around loops in the use-def chain. To
@@ -596,11 +596,8 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
CmpInst::Predicate Pred;
Value *Val;
Constant *Cst;
- if (!PredCst && match(V, m_Cmp(Pred, m_Value(Val), m_Constant(Cst)))) {
- auto Res = LVI->getPredicateOnEdge(Pred, Val, Cst, P, BB, CxtI);
- if (Res != LazyValueInfo::Unknown)
- PredCst = ConstantInt::getBool(V->getContext(), Res);
- }
+ if (!PredCst && match(V, m_Cmp(Pred, m_Value(Val), m_Constant(Cst))))
+ PredCst = LVI->getPredicateOnEdge(Pred, Val, Cst, P, BB, CxtI);
if (Constant *KC = getKnownConstant(PredCst, Preference))
Result.emplace_back(KC, P);
}
@@ -757,7 +754,7 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
// may result in comparison of values from two different loop iterations.
// FIXME: This check is broken if LoopHeaders is not populated.
if (PN && PN->getParent() == BB && !LoopHeaders.contains(BB)) {
- const DataLayout &DL = PN->getModule()->getDataLayout();
+ const DataLayout &DL = PN->getDataLayout();
// We can do this simplification if any comparisons fold to true or false.
// See if any do.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -780,13 +777,8 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
if (LHSInst && LHSInst->getParent() == BB)
continue;
- LazyValueInfo::Tristate
- ResT = LVI->getPredicateOnEdge(Pred, LHS,
- cast<Constant>(RHS), PredBB, BB,
- CxtI ? CxtI : Cmp);
- if (ResT == LazyValueInfo::Unknown)
- continue;
- Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
+ Res = LVI->getPredicateOnEdge(Pred, LHS, cast<Constant>(RHS), PredBB,
+ BB, CxtI ? CxtI : Cmp);
}
if (Constant *KC = getKnownConstant(Res, WantInteger))
@@ -806,14 +798,10 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
for (BasicBlock *P : predecessors(BB)) {
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
- LazyValueInfo::Tristate Res =
- LVI->getPredicateOnEdge(Pred, CmpLHS,
- CmpConst, P, BB, CxtI ? CxtI : Cmp);
- if (Res == LazyValueInfo::Unknown)
- continue;
-
- Constant *ResC = ConstantInt::get(CmpType, Res);
- Result.emplace_back(ResC, P);
+ Constant *Res = LVI->getPredicateOnEdge(Pred, CmpLHS, CmpConst, P, BB,
+ CxtI ? CxtI : Cmp);
+ if (Constant *KC = getKnownConstant(Res, WantInteger))
+ Result.emplace_back(KC, P);
}
return !Result.empty();
@@ -868,7 +856,8 @@ bool JumpThreadingPass::computeValueKnownInPredecessorsImpl(
for (const auto &LHSVal : LHSVals) {
Constant *V = LHSVal.first;
- Constant *Folded = ConstantExpr::getCompare(Pred, V, CmpConst);
+ Constant *Folded =
+ ConstantFoldCompareInstOperands(Pred, V, CmpConst, DL);
if (Constant *KC = getKnownConstant(Folded, WantInteger))
Result.emplace_back(KC, LHSVal.second);
}
@@ -1007,7 +996,7 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// constant.
if (Instruction *I = dyn_cast<Instruction>(Condition)) {
Value *SimpleVal =
- ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI);
+ ConstantFoldInstruction(I, BB->getDataLayout(), TLI);
if (SimpleVal) {
I->replaceAllUsesWith(SimpleVal);
if (isInstructionTriviallyDead(I, TLI))
@@ -1037,7 +1026,8 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
LLVM_DEBUG(dbgs() << " In block '" << BB->getName()
<< "' folding undef terminator: " << *BBTerm << '\n');
- BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
+ Instruction *NewBI = BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm->getIterator());
+ NewBI->setDebugLoc(BBTerm->getDebugLoc());
++NumFolds;
BBTerm->eraseFromParent();
DTU->applyUpdatesPermissive(Updates);
@@ -1080,11 +1070,11 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// it's value at the branch instruction. We only handle comparisons
// against a constant at this time.
if (Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1))) {
- LazyValueInfo::Tristate Ret =
+ Constant *Res =
LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
CondConst, BB->getTerminator(),
/*UseBlockValue=*/false);
- if (Ret != LazyValueInfo::Unknown) {
+ if (Res) {
// We can safely replace *some* uses of the CondInst if it has
// exactly one value as returned by LVI. RAUW is incorrect in the
// presence of guards and assumes, that have the `Cond` as the use. This
@@ -1092,10 +1082,7 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
// at the end of block, but RAUW unconditionally replaces all uses
// including the guards/assumes themselves and the uses before the
// guard/assume.
- auto *CI = Ret == LazyValueInfo::True ?
- ConstantInt::getTrue(CondCmp->getType()) :
- ConstantInt::getFalse(CondCmp->getType());
- if (replaceFoldableUses(CondCmp, CI, BB))
+ if (replaceFoldableUses(CondCmp, Res, BB))
return true;
}
@@ -1177,7 +1164,7 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
BasicBlock *CurrentPred = BB->getSinglePredecessor();
unsigned Iter = 0;
- auto &DL = BB->getModule()->getDataLayout();
+ auto &DL = BB->getDataLayout();
while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
@@ -1202,7 +1189,7 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
RemoveSucc->removePredecessor(BB);
- BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI);
+ BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI->getIterator());
UncondBI->setDebugLoc(BI->getDebugLoc());
++NumFolds;
BI->eraseFromParent();
@@ -1278,9 +1265,11 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// only happen in dead loops.
if (AvailableVal == LoadI)
AvailableVal = PoisonValue::get(LoadI->getType());
- if (AvailableVal->getType() != LoadI->getType())
+ if (AvailableVal->getType() != LoadI->getType()) {
AvailableVal = CastInst::CreateBitOrPointerCast(
- AvailableVal, LoadI->getType(), "", LoadI);
+ AvailableVal, LoadI->getType(), "", LoadI->getIterator());
+ cast<Instruction>(AvailableVal)->setDebugLoc(LoadI->getDebugLoc());
+ }
LoadI->replaceAllUsesWith(AvailableVal);
LoadI->eraseFromParent();
return true;
@@ -1321,7 +1310,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// If this is a load on a phi pointer, phi-translate it and search
// for available load/store to the pointer in predecessors.
Type *AccessTy = LoadI->getType();
- const auto &DL = LoadI->getModule()->getDataLayout();
+ const auto &DL = LoadI->getDataLayout();
MemoryLocation Loc(LoadedPtr->DoPHITranslation(LoadBB, PredBB),
LocationSize::precise(DL.getTypeStoreSize(AccessTy)),
AATags);
@@ -1421,7 +1410,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
LoadI->getName() + ".pr", false, LoadI->getAlign(),
LoadI->getOrdering(), LoadI->getSyncScopeID(),
- UnavailablePred->getTerminator());
+ UnavailablePred->getTerminator()->getIterator());
NewVal->setDebugLoc(LoadI->getDebugLoc());
if (AATags)
NewVal->setAAMetadata(AATags);
@@ -1434,16 +1423,14 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
// Create a PHI node at the start of the block for the PRE'd load value.
- pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
- PHINode *PN = PHINode::Create(LoadI->getType(), std::distance(PB, PE), "");
+ PHINode *PN = PHINode::Create(LoadI->getType(), pred_size(LoadBB), "");
PN->insertBefore(LoadBB->begin());
PN->takeName(LoadI);
PN->setDebugLoc(LoadI->getDebugLoc());
// Insert new entries into the PHI for each predecessor. A single block may
// have multiple entries here.
- for (pred_iterator PI = PB; PI != PE; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(LoadBB)) {
AvailablePredsTy::iterator I =
llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
@@ -1456,8 +1443,8 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// predecessor use the same bitcast.
Value *&PredV = I->second;
if (PredV->getType() != LoadI->getType())
- PredV = CastInst::CreateBitOrPointerCast(PredV, LoadI->getType(), "",
- P->getTerminator());
+ PredV = CastInst::CreateBitOrPointerCast(
+ PredV, LoadI->getType(), "", P->getTerminator()->getIterator());
PN->addIncoming(PredV, I->first);
}
@@ -1490,7 +1477,7 @@ findMostPopularDest(BasicBlock *BB,
// Populate DestPopularity with the successors in the order they appear in the
// successor list. This way, we ensure determinism by iterating it in the
- // same order in std::max_element below. We map nullptr to 0 so that we can
+ // same order in llvm::max_element below. We map nullptr to 0 so that we can
// return nullptr when PredToDestList contains nullptr only.
DestPopularity[nullptr] = 0;
for (auto *SuccBB : successors(BB))
@@ -1501,8 +1488,7 @@ findMostPopularDest(BasicBlock *BB,
DestPopularity[PredToDest.second]++;
// Find the most popular dest.
- auto MostPopular = std::max_element(
- DestPopularity.begin(), DestPopularity.end(), llvm::less_second());
+ auto MostPopular = llvm::max_element(DestPopularity, llvm::less_second());
// Okay, we have finally picked the most popular destination.
return MostPopular->first;
@@ -1512,7 +1498,8 @@ findMostPopularDest(BasicBlock *BB,
// BB->getSinglePredecessor() and then on to BB.
Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB,
BasicBlock *PredPredBB,
- Value *V) {
+ Value *V,
+ const DataLayout &DL) {
BasicBlock *PredBB = BB->getSinglePredecessor();
assert(PredBB && "Expected a single predecessor");
@@ -1537,11 +1524,12 @@ Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB,
if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
if (CondCmp->getParent() == BB) {
Constant *Op0 =
- evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
+ evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0), DL);
Constant *Op1 =
- evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
+ evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1), DL);
if (Op0 && Op1) {
- return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1);
+ return ConstantFoldCompareInstOperands(CondCmp->getPredicate(), Op0,
+ Op1, DL);
}
}
return nullptr;
@@ -1655,7 +1643,8 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
// Finally update the terminator.
Instruction *Term = BB->getTerminator();
- BranchInst::Create(OnlyDest, Term);
+ Instruction *NewBI = BranchInst::Create(OnlyDest, Term->getIterator());
+ NewBI->setDebugLoc(Term->getDebugLoc());
++NumFolds;
Term->eraseFromParent();
DTU->applyUpdatesPermissive(Updates);
@@ -1879,7 +1868,7 @@ bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) {
static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
BasicBlock *OldPred,
BasicBlock *NewPred,
- DenseMap<Instruction*, Value*> &ValueMap) {
+ ValueToValueMapTy &ValueMap) {
for (PHINode &PN : PHIBB->phis()) {
// Ok, we have a PHI node. Figure out what the incoming value was for the
// DestBlock.
@@ -1887,7 +1876,7 @@ static void addPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
// Remap the value if necessary.
if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
- DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
+ ValueToValueMapTy::iterator I = ValueMap.find(Inst);
if (I != ValueMap.end())
IV = I->second;
}
@@ -1948,9 +1937,8 @@ bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {
/// Update the SSA form. NewBB contains instructions that are copied from BB.
/// ValueMapping maps old values in BB to new ones in NewBB.
-void JumpThreadingPass::updateSSA(
- BasicBlock *BB, BasicBlock *NewBB,
- DenseMap<Instruction *, Value *> &ValueMapping) {
+void JumpThreadingPass::updateSSA(BasicBlock *BB, BasicBlock *NewBB,
+ ValueToValueMapTy &ValueMapping) {
// If there were values defined in BB that are used outside the block, then we
// now have to update all uses of the value to use either the original value,
// the cloned value, or some PHI derived value. This can require arbitrary
@@ -1958,7 +1946,7 @@ void JumpThreadingPass::updateSSA(
SSAUpdater SSAUpdate;
SmallVector<Use *, 16> UsesToRename;
SmallVector<DbgValueInst *, 4> DbgValues;
- SmallVector<DPValue *, 4> DPValues;
+ SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
for (Instruction &I : *BB) {
// Scan all uses of this instruction to see if it is used outside of its
@@ -1975,16 +1963,16 @@ void JumpThreadingPass::updateSSA(
}
// Find debug values outside of the block
- findDbgValues(DbgValues, &I, &DPValues);
+ findDbgValues(DbgValues, &I, &DbgVariableRecords);
llvm::erase_if(DbgValues, [&](const DbgValueInst *DbgVal) {
return DbgVal->getParent() == BB;
});
- llvm::erase_if(DPValues, [&](const DPValue *DPVal) {
- return DPVal->getParent() == BB;
+ llvm::erase_if(DbgVariableRecords, [&](const DbgVariableRecord *DbgVarRec) {
+ return DbgVarRec->getParent() == BB;
});
// If there are no uses outside the block, we're done with this instruction.
- if (UsesToRename.empty() && DbgValues.empty() && DPValues.empty())
+ if (UsesToRename.empty() && DbgValues.empty() && DbgVariableRecords.empty())
continue;
LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
@@ -1997,11 +1985,11 @@ void JumpThreadingPass::updateSSA(
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
- if (!DbgValues.empty() || !DPValues.empty()) {
+ if (!DbgValues.empty() || !DbgVariableRecords.empty()) {
SSAUpdate.UpdateDebugValues(&I, DbgValues);
- SSAUpdate.UpdateDebugValues(&I, DPValues);
+ SSAUpdate.UpdateDebugValues(&I, DbgVariableRecords);
DbgValues.clear();
- DPValues.clear();
+ DbgVariableRecords.clear();
}
LLVM_DEBUG(dbgs() << "\n");
@@ -2011,14 +1999,15 @@ void JumpThreadingPass::updateSSA(
/// Clone instructions in range [BI, BE) to NewBB. For PHI nodes, we only clone
/// arguments that come from PredBB. Return the map from the variables in the
/// source basic block to the variables in the newly created basic block.
-DenseMap<Instruction *, Value *>
-JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
- BasicBlock::iterator BE, BasicBlock *NewBB,
- BasicBlock *PredBB) {
+
+void JumpThreadingPass::cloneInstructions(ValueToValueMapTy &ValueMapping,
+ BasicBlock::iterator BI,
+ BasicBlock::iterator BE,
+ BasicBlock *NewBB,
+ BasicBlock *PredBB) {
// We are going to have to map operands from the source basic block to the new
// copy of the block 'NewBB'. If there are PHI nodes in the source basic
// block, evaluate them to account for entry from PredBB.
- DenseMap<Instruction *, Value *> ValueMapping;
// Retargets llvm.dbg.value to any renamed variables.
auto RetargetDbgValueIfPossible = [&](Instruction *NewInst) -> bool {
@@ -2044,11 +2033,11 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
return true;
};
- // Duplicate implementation of the above dbg.value code, using DPValues
- // instead.
- auto RetargetDPValueIfPossible = [&](DPValue *DPV) {
+ // Duplicate implementation of the above dbg.value code, using
+ // DbgVariableRecords instead.
+ auto RetargetDbgVariableRecordIfPossible = [&](DbgVariableRecord *DVR) {
SmallSet<std::pair<Value *, Value *>, 16> OperandsToRemap;
- for (auto *Op : DPV->location_ops()) {
+ for (auto *Op : DVR->location_ops()) {
Instruction *OpInst = dyn_cast<Instruction>(Op);
if (!OpInst)
continue;
@@ -2059,7 +2048,7 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
}
for (auto &[OldOp, MappedOp] : OperandsToRemap)
- DPV->replaceVariableLocationOp(OldOp, MappedOp);
+ DVR->replaceVariableLocationOp(OldOp, MappedOp);
};
BasicBlock *RangeBB = BI->getParent();
@@ -2083,9 +2072,9 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
cloneNoAliasScopes(NoAliasScopes, ClonedScopes, "thread", Context);
auto CloneAndRemapDbgInfo = [&](Instruction *NewInst, Instruction *From) {
- auto DPVRange = NewInst->cloneDebugInfoFrom(From);
- for (DPValue &DPV : DPVRange)
- RetargetDPValueIfPossible(&DPV);
+ auto DVRRange = NewInst->cloneDebugInfoFrom(From);
+ for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))
+ RetargetDbgVariableRecordIfPossible(&DVR);
};
// Clone the non-phi instructions of the source basic block into NewBB,
@@ -2106,24 +2095,24 @@ JumpThreadingPass::cloneInstructions(BasicBlock::iterator BI,
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
- DenseMap<Instruction *, Value *>::iterator I = ValueMapping.find(Inst);
+ ValueToValueMapTy::iterator I = ValueMapping.find(Inst);
if (I != ValueMapping.end())
New->setOperand(i, I->second);
}
}
- // There may be DPValues on the terminator, clone directly from marker
- // to marker as there isn't an instruction there.
- if (BE != RangeBB->end() && BE->hasDbgValues()) {
+ // There may be DbgVariableRecords on the terminator, clone directly from
+ // marker to marker as there isn't an instruction there.
+ if (BE != RangeBB->end() && BE->hasDbgRecords()) {
// Dump them at the end.
- DPMarker *Marker = RangeBB->getMarker(BE);
- DPMarker *EndMarker = NewBB->createMarker(NewBB->end());
- auto DPVRange = EndMarker->cloneDebugInfoFrom(Marker, std::nullopt);
- for (DPValue &DPV : DPVRange)
- RetargetDPValueIfPossible(&DPV);
+ DbgMarker *Marker = RangeBB->getMarker(BE);
+ DbgMarker *EndMarker = NewBB->createMarker(NewBB->end());
+ auto DVRRange = EndMarker->cloneDebugInfoFrom(Marker, std::nullopt);
+ for (DbgVariableRecord &DVR : filterDbgVars(DVRRange))
+ RetargetDbgVariableRecordIfPossible(&DVR);
}
- return ValueMapping;
+ return;
}
/// Attempt to thread through two successive basic blocks.
@@ -2194,12 +2183,13 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
unsigned OneCount = 0;
BasicBlock *ZeroPred = nullptr;
BasicBlock *OnePred = nullptr;
+ const DataLayout &DL = BB->getDataLayout();
for (BasicBlock *P : predecessors(PredBB)) {
// If PredPred ends with IndirectBrInst, we can't handle it.
if (isa<IndirectBrInst>(P->getTerminator()))
continue;
if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
- evaluateOnPredecessorEdge(BB, P, Cond))) {
+ evaluateOnPredecessorEdge(BB, P, Cond, DL))) {
if (CI->isZero()) {
ZeroCount++;
ZeroPred = P;
@@ -2298,8 +2288,9 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
// to account for entry from PredPredBB.
- DenseMap<Instruction *, Value *> ValueMapping =
- cloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
+ ValueToValueMapTy ValueMapping;
+ cloneInstructions(ValueMapping, PredBB->begin(), PredBB->end(), NewBB,
+ PredPredBB);
// Copy the edge probabilities from PredBB to NewBB.
if (BPI)
@@ -2422,8 +2413,9 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB,
}
// Copy all the instructions from BB to NewBB except the terminator.
- DenseMap<Instruction *, Value *> ValueMapping =
- cloneInstructions(BB->begin(), std::prev(BB->end()), NewBB, PredBB);
+ ValueToValueMapTy ValueMapping;
+ cloneInstructions(ValueMapping, BB->begin(), std::prev(BB->end()), NewBB,
+ PredBB);
// We didn't copy the terminator from BB over to NewBB, because there is now
// an unconditional jump to SuccBB. Insert the unconditional jump.
@@ -2555,8 +2547,7 @@ void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
BBSuccFreq.push_back(SuccFreq.getFrequency());
}
- uint64_t MaxBBSuccFreq =
- *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
+ uint64_t MaxBBSuccFreq = *llvm::max_element(BBSuccFreq);
SmallVector<BranchProbability, 4> BBSuccProbs;
if (MaxBBSuccFreq == 0)
@@ -2614,7 +2605,7 @@ void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
Weights.push_back(Prob.getNumerator());
auto TI = BB->getTerminator();
- setBranchWeights(*TI, Weights);
+ setBranchWeights(*TI, Weights, hasBranchWeightOrigin(*TI));
}
}
@@ -2679,7 +2670,7 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
// We are going to have to map operands from the original BB block into the
// PredBB block. Evaluate PHI nodes in BB.
- DenseMap<Instruction*, Value*> ValueMapping;
+ ValueToValueMapTy ValueMapping;
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
@@ -2693,17 +2684,20 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
- DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
+ ValueToValueMapTy::iterator I = ValueMapping.find(Inst);
if (I != ValueMapping.end())
New->setOperand(i, I->second);
}
+ // Remap debug variable operands.
+ remapDebugVariable(ValueMapping, New);
+
// If this instruction can be simplified after the operands are updated,
// just use the simplified value instead. This frequently happens due to
// phi translation.
if (Value *IV = simplifyInstruction(
New,
- {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
+ {BB->getDataLayout(), TLI, nullptr, nullptr, New})) {
ValueMapping[&*BI] = IV;
if (!New->mayHaveSideEffects()) {
New->eraseFromParent();
@@ -2882,15 +2876,13 @@ bool JumpThreadingPass::tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
// Now check if one of the select values would allow us to constant fold the
// terminator in BB. We don't do the transform if both sides fold, those
// cases will be threaded in any case.
- LazyValueInfo::Tristate LHSFolds =
+ Constant *LHSRes =
LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(1),
CondRHS, Pred, BB, CondCmp);
- LazyValueInfo::Tristate RHSFolds =
+ Constant *RHSRes =
LVI->getPredicateOnEdge(CondCmp->getPredicate(), SI->getOperand(2),
CondRHS, Pred, BB, CondCmp);
- if ((LHSFolds != LazyValueInfo::Unknown ||
- RHSFolds != LazyValueInfo::Unknown) &&
- LHSFolds != RHSFolds) {
+ if ((LHSRes || RHSRes) && LHSRes != RHSRes) {
unfoldSelectInstr(Pred, BB, SI, CondLHS, I);
return true;
}
@@ -2973,15 +2965,16 @@ bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) {
// Expand the select.
Value *Cond = SI->getCondition();
if (!isGuaranteedNotToBeUndefOrPoison(Cond, nullptr, SI))
- Cond = new FreezeInst(Cond, "cond.fr", SI);
+ Cond = new FreezeInst(Cond, "cond.fr", SI->getIterator());
MDNode *BranchWeights = getBranchWeightMDNode(*SI);
Instruction *Term =
SplitBlockAndInsertIfThen(Cond, SI, false, BranchWeights);
BasicBlock *SplitBB = SI->getParent();
BasicBlock *NewBB = Term->getParent();
- PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
+ PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI->getIterator());
NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
NewPN->addIncoming(SI->getFalseValue(), BB);
+ NewPN->setDebugLoc(SI->getDebugLoc());
SI->replaceAllUsesWith(NewPN);
SI->eraseFromParent();
// NewBB and SplitBB are newly created blocks which require insertion.
@@ -3063,7 +3056,7 @@ bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
BasicBlock *TrueDest = BI->getSuccessor(0);
BasicBlock *FalseDest = BI->getSuccessor(1);
- auto &DL = BB->getModule()->getDataLayout();
+ auto &DL = BB->getDataLayout();
bool TrueDestIsSafe = false;
bool FalseDestIsSafe = false;
@@ -3119,10 +3112,11 @@ bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
+ NewPN->setDebugLoc(Inst->getDebugLoc());
NewPN->insertBefore(InsertionPoint);
Inst->replaceAllUsesWith(NewPN);
}
- Inst->dropDbgValues();
+ Inst->dropDbgRecords();
Inst->eraseFromParent();
}
return true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
index f3e40a5cb809..fe264503dee9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -110,6 +110,11 @@ STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated "
"and hoisted out of the loop");
STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions "
"reassociated and hoisted out of the loop");
+STATISTIC(NumIntAssociationsHoisted,
+ "Number of invariant int expressions "
+ "reassociated and hoisted out of the loop");
+STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
+ "reassociated and hoisted out of the loop");
/// Memory promotion is enabled by default.
static cl::opt<bool>
@@ -135,6 +140,12 @@ static cl::opt<unsigned> FPAssociationUpperLimit(
"Set upper limit for the number of transformations performed "
"during a single round of hoisting the reassociated expressions."));
+cl::opt<unsigned> IntAssociationUpperLimit(
+ "licm-max-num-int-reassociations", cl::init(5U), cl::Hidden,
+ cl::desc(
+ "Set upper limit for the number of transformations performed "
+ "during a single round of hoisting the reassociated expressions."));
+
// Experimental option to allow imprecision in LICM in pathological cases, in
// exchange for faster compile. This is to be removed if MemorySSA starts to
// address the same issue. LICM calls MemorySSAWalker's
@@ -924,12 +935,14 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
ReciprocalDivisor->insertBefore(&I);
+ ReciprocalDivisor->setDebugLoc(I.getDebugLoc());
auto Product =
BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
Product->setFastMathFlags(I.getFastMathFlags());
SafetyInfo->insertInstructionTo(Product, I.getParent());
Product->insertAfter(&I);
+ Product->setDebugLoc(I.getDebugLoc());
I.replaceAllUsesWith(Product);
eraseInstruction(I, *SafetyInfo, MSSAU);
@@ -1041,7 +1054,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
Loop *CurLoop) {
Value *Addr = LI->getPointerOperand();
- const DataLayout &DL = LI->getModule()->getDataLayout();
+ const DataLayout &DL = LI->getDataLayout();
const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
// It is not currently possible for clang to generate an invariant.start
@@ -1208,6 +1221,14 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (CI->isConvergent())
return false;
+ // FIXME: Current LLVM IR semantics don't work well with coroutines and
+ // thread local globals. We currently treat getting the address of a thread
+ // local global as not accessing memory, even though it may not be a
+ // constant throughout a function with coroutines. Remove this check after
+ // we better model semantics of thread local globals.
+ if (CI->getFunction()->isPresplitCoroutine())
+ return false;
+
using namespace PatternMatch;
if (match(CI, m_Intrinsic<Intrinsic::assume>()))
// Assumes don't actually alias anything or throw
@@ -1216,14 +1237,6 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// Handle simple cases by querying alias analysis.
MemoryEffects Behavior = AA->getMemoryEffects(CI);
- // FIXME: we don't handle the semantics of thread local well. So that the
- // address of thread locals are fake constants in coroutines. So We forbid
- // to treat onlyReadsMemory call in coroutines as constants now. Note that
- // it is possible to hide a thread local access in a onlyReadsMemory call.
- // Remove this check after we handle the semantics of thread locals well.
- if (Behavior.onlyReadsMemory() && CI->getFunction()->isPresplitCoroutine())
- return false;
-
if (Behavior.doesNotAccessMemory())
return true;
if (Behavior.onlyReadsMemory()) {
@@ -1442,6 +1455,7 @@ static Instruction *cloneInstructionInExitBlock(
}
New = CallInst::Create(CI, OpBundles);
+ New->copyMetadata(*CI);
} else {
New = I.clone();
}
@@ -2031,7 +2045,7 @@ bool llvm::promoteLoopAccessesToScalars(
bool SawNotAtomic = false;
AAMDNodes AATags;
- const DataLayout &MDL = Preheader->getModule()->getDataLayout();
+ const DataLayout &MDL = Preheader->getDataLayout();
// If there are reads outside the promoted set, then promoting stores is
// definitely not safe.
@@ -2225,7 +2239,7 @@ bool llvm::promoteLoopAccessesToScalars(
if (FoundLoadToPromote || !StoreIsGuanteedToExecute) {
PreheaderLoad =
new LoadInst(AccessTy, SomePtr, SomePtr->getName() + ".promoted",
- Preheader->getTerminator());
+ Preheader->getTerminator()->getIterator());
if (SawUnorderedAtomic)
PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
PreheaderLoad->setAlignment(Alignment);
@@ -2494,7 +2508,7 @@ static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
// The swapped GEPs are inbounds if both original GEPs are inbounds
// and the sign of the offsets is the same. For simplicity, only
// handle both offsets being non-negative.
- const DataLayout &DL = GEP->getModule()->getDataLayout();
+ const DataLayout &DL = GEP->getDataLayout();
auto NonNegative = [&](Value *V) {
return isKnownNonNegative(V, SimplifyQuery(DL, DT, AC, GEP));
};
@@ -2544,7 +2558,7 @@ static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
// freely move values from left side of inequality to right side (just as in
// normal linear arithmetics). Overflows make things much more complicated, so
// we want to avoid this.
- auto &DL = L.getHeader()->getModule()->getDataLayout();
+ auto &DL = L.getHeader()->getDataLayout();
bool ProvedNoOverflowAfterReassociate =
computeOverflowForSignedSub(InvariantRHS, InvariantOp,
SimplifyQuery(DL, DT, AC, &ICmp)) ==
@@ -2597,7 +2611,7 @@ static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
// normal linear arithmetics). Overflows make things much more complicated, so
// we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that
// "C1 - C2" does not overflow.
- auto &DL = L.getHeader()->getModule()->getDataLayout();
+ auto &DL = L.getHeader()->getDataLayout();
SimplifyQuery SQ(DL, DT, AC, &ICmp);
if (VariantSubtracted) {
// C1 - LV < C2 --> LV > C1 - C2
@@ -2661,21 +2675,29 @@ static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
return false;
}
+static bool isReassociableOp(Instruction *I, unsigned IntOpcode,
+ unsigned FPOpcode) {
+ if (I->getOpcode() == IntOpcode)
+ return true;
+ if (I->getOpcode() == FPOpcode && I->hasAllowReassoc() &&
+ I->hasNoSignedZeros())
+ return true;
+ return false;
+}
+
/// Try to reassociate expressions like ((A1 * B1) + (A2 * B2) + ...) * C where
/// A1, A2, ... and C are loop invariants into expressions like
/// ((A1 * C * B1) + (A2 * C * B2) + ...) and hoist the (A1 * C), (A2 * C), ...
/// invariant expressions. This functions returns true only if any hoisting has
/// actually occured.
-static bool hoistFPAssociation(Instruction &I, Loop &L,
- ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater &MSSAU, AssumptionCache *AC,
- DominatorTree *DT) {
- using namespace PatternMatch;
- Value *VariantOp = nullptr, *InvariantOp = nullptr;
-
- if (!match(&I, m_FMul(m_Value(VariantOp), m_Value(InvariantOp))) ||
- !I.hasAllowReassoc() || !I.hasNoSignedZeros())
+static bool hoistMulAddAssociation(Instruction &I, Loop &L,
+ ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater &MSSAU, AssumptionCache *AC,
+ DominatorTree *DT) {
+ if (!isReassociableOp(&I, Instruction::Mul, Instruction::FMul))
return false;
+ Value *VariantOp = I.getOperand(0);
+ Value *InvariantOp = I.getOperand(1);
if (L.isLoopInvariant(VariantOp))
std::swap(VariantOp, InvariantOp);
if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
@@ -2684,20 +2706,24 @@ static bool hoistFPAssociation(Instruction &I, Loop &L,
// First, we need to make sure we should do the transformation.
SmallVector<Use *> Changes;
+ SmallVector<BinaryOperator *> Adds;
SmallVector<BinaryOperator *> Worklist;
if (BinaryOperator *VariantBinOp = dyn_cast<BinaryOperator>(VariantOp))
Worklist.push_back(VariantBinOp);
while (!Worklist.empty()) {
BinaryOperator *BO = Worklist.pop_back_val();
- if (!BO->hasOneUse() || !BO->hasAllowReassoc() || !BO->hasNoSignedZeros())
+ if (!BO->hasOneUse())
return false;
- BinaryOperator *Op0, *Op1;
- if (match(BO, m_FAdd(m_BinOp(Op0), m_BinOp(Op1)))) {
- Worklist.push_back(Op0);
- Worklist.push_back(Op1);
+ if (isReassociableOp(BO, Instruction::Add, Instruction::FAdd) &&
+ isa<BinaryOperator>(BO->getOperand(0)) &&
+ isa<BinaryOperator>(BO->getOperand(1))) {
+ Worklist.push_back(cast<BinaryOperator>(BO->getOperand(0)));
+ Worklist.push_back(cast<BinaryOperator>(BO->getOperand(1)));
+ Adds.push_back(BO);
continue;
}
- if (BO->getOpcode() != Instruction::FMul || L.isLoopInvariant(BO))
+ if (!isReassociableOp(BO, Instruction::Mul, Instruction::FMul) ||
+ L.isLoopInvariant(BO))
return false;
Use &U0 = BO->getOperandUse(0);
Use &U1 = BO->getOperandUse(1);
@@ -2707,26 +2733,108 @@ static bool hoistFPAssociation(Instruction &I, Loop &L,
Changes.push_back(&U1);
else
return false;
- if (Changes.size() > FPAssociationUpperLimit)
+ unsigned Limit = I.getType()->isIntOrIntVectorTy()
+ ? IntAssociationUpperLimit
+ : FPAssociationUpperLimit;
+ if (Changes.size() > Limit)
return false;
}
if (Changes.empty())
return false;
+ // Drop the poison flags for any adds we looked through.
+ if (I.getType()->isIntOrIntVectorTy()) {
+ for (auto *Add : Adds)
+ Add->dropPoisonGeneratingFlags();
+ }
+
// We know we should do it so let's do the transformation.
auto *Preheader = L.getLoopPreheader();
assert(Preheader && "Loop is not in simplify form?");
IRBuilder<> Builder(Preheader->getTerminator());
for (auto *U : Changes) {
assert(L.isLoopInvariant(U->get()));
- Instruction *Ins = cast<Instruction>(U->getUser());
- U->set(Builder.CreateFMulFMF(U->get(), Factor, Ins, "factor.op.fmul"));
+ auto *Ins = cast<BinaryOperator>(U->getUser());
+ Value *Mul;
+ if (I.getType()->isIntOrIntVectorTy()) {
+ Mul = Builder.CreateMul(U->get(), Factor, "factor.op.mul");
+ // Drop the poison flags on the original multiply.
+ Ins->dropPoisonGeneratingFlags();
+ } else
+ Mul = Builder.CreateFMulFMF(U->get(), Factor, Ins, "factor.op.fmul");
+
+ // Rewrite the reassociable instruction.
+ unsigned OpIdx = U->getOperandNo();
+ auto *LHS = OpIdx == 0 ? Mul : Ins->getOperand(0);
+ auto *RHS = OpIdx == 1 ? Mul : Ins->getOperand(1);
+ auto *NewBO = BinaryOperator::Create(Ins->getOpcode(), LHS, RHS,
+ Ins->getName() + ".reass", Ins);
+ NewBO->copyIRFlags(Ins);
+ if (VariantOp == Ins)
+ VariantOp = NewBO;
+ Ins->replaceAllUsesWith(NewBO);
+ eraseInstruction(*Ins, SafetyInfo, MSSAU);
}
+
I.replaceAllUsesWith(VariantOp);
eraseInstruction(I, SafetyInfo, MSSAU);
return true;
}
+/// Reassociate general associative binary expressions of the form
+///
+/// 1. "(LV op C1) op C2" ==> "LV op (C1 op C2)"
+///
+/// where op is an associative binary op, LV is a loop variant, and C1 and C2
+/// are loop invariants that we want to hoist.
+///
+/// TODO: This can be extended to more cases such as
+/// 2. "C1 op (C2 op LV)" ==> "(C1 op C2) op LV"
+/// 3. "(C1 op LV) op C2" ==> "LV op (C1 op C2)" if op is commutative
+/// 4. "C1 op (LV op C2)" ==> "(C1 op C2) op LV" if op is commutative
+static bool hoistBOAssociation(Instruction &I, Loop &L,
+ ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater &MSSAU, AssumptionCache *AC,
+ DominatorTree *DT) {
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(&I);
+ if (!BO || !BO->isAssociative())
+ return false;
+
+ Instruction::BinaryOps Opcode = BO->getOpcode();
+ BinaryOperator *Op0 = dyn_cast<BinaryOperator>(BO->getOperand(0));
+
+ // Transform: "(LV op C1) op C2" ==> "LV op (C1 op C2)"
+ if (Op0 && Op0->getOpcode() == Opcode) {
+ Value *LV = Op0->getOperand(0);
+ Value *C1 = Op0->getOperand(1);
+ Value *C2 = BO->getOperand(1);
+
+ if (L.isLoopInvariant(LV) || !L.isLoopInvariant(C1) ||
+ !L.isLoopInvariant(C2))
+ return false;
+
+ auto *Preheader = L.getLoopPreheader();
+ assert(Preheader && "Loop is not in simplify form?");
+ IRBuilder<> Builder(Preheader->getTerminator());
+ Value *Inv = Builder.CreateBinOp(Opcode, C1, C2, "invariant.op");
+
+ auto *NewBO =
+ BinaryOperator::Create(Opcode, LV, Inv, BO->getName() + ".reass", BO);
+ NewBO->copyIRFlags(BO);
+ BO->replaceAllUsesWith(NewBO);
+ eraseInstruction(*BO, SafetyInfo, MSSAU);
+
+ // Note: (LV op C1) might not be erased if it has more uses than the one we
+ // just replaced.
+ if (Op0->use_empty())
+ eraseInstruction(*Op0, SafetyInfo, MSSAU);
+
+ return true;
+ }
+
+ return false;
+}
+
static bool hoistArithmetics(Instruction &I, Loop &L,
ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
@@ -2754,9 +2862,19 @@ static bool hoistArithmetics(Instruction &I, Loop &L,
return true;
}
- if (hoistFPAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
+ bool IsInt = I.getType()->isIntOrIntVectorTy();
+ if (hoistMulAddAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
+ ++NumHoisted;
+ if (IsInt)
+ ++NumIntAssociationsHoisted;
+ else
+ ++NumFPAssociationsHoisted;
+ return true;
+ }
+
+ if (hoistBOAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
++NumHoisted;
- ++NumFPAssociationsHoisted;
+ ++NumBOAssociationsHoisted;
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
index 9a27a08c86eb..6092cd1bc08b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
@@ -405,7 +405,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
: SE.getUMinExpr(NewBoundSCEV, SplitBoundSCEV);
SCEVExpander Expander(
- SE, L.getHeader()->getParent()->getParent()->getDataLayout(), "split");
+ SE, L.getHeader()->getDataLayout(), "split");
Instruction *InsertPt = SplitLoopPH->getTerminator();
Value *NewBoundValue =
Expander.expandCodeFor(NewBoundSCEV, NewBoundSCEV->getType(), InsertPt);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index cc1f56014eee..d85166e518f1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -391,7 +391,7 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
continue;
BasicBlock *BB = P.InsertPt->getParent();
- SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr");
+ SCEVExpander SCEVE(*SE, BB->getDataLayout(), "prefaddr");
const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr(
SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
P.LSCEVAddRec->getStepRecurrence(*SE)));
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index bfe9374cf2f8..b0b7ae60da98 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -273,9 +273,9 @@ static bool canProveExitOnFirstIteration(Loop *L, DominatorTree &DT,
if (LiveEdges.count({ Pred, BB })) {
HasLivePreds = true;
Value *Incoming = PN.getIncomingValueForBlock(Pred);
- // Skip undefs. If they are present, we can assume they are equal to
- // the non-undef input.
- if (isa<UndefValue>(Incoming))
+ // Skip poison. If they are present, we can assume they are equal to
+ // the non-poison input.
+ if (isa<PoisonValue>(Incoming))
continue;
// Two inputs.
if (OnlyInput && OnlyInput != Incoming)
@@ -284,8 +284,8 @@ static bool canProveExitOnFirstIteration(Loop *L, DominatorTree &DT,
}
assert(HasLivePreds && "No live predecessors?");
- // If all incoming live value were undefs, return undef.
- return OnlyInput ? OnlyInput : UndefValue::get(PN.getType());
+ // If all incoming live value were poison, return poison.
+ return OnlyInput ? OnlyInput : PoisonValue::get(PN.getType());
};
DenseMap<Value *, Value *> FirstIterValue;
@@ -299,7 +299,7 @@ static bool canProveExitOnFirstIteration(Loop *L, DominatorTree &DT,
// iteration, mark this successor live.
// 3b. If we cannot prove it, conservatively assume that all successors are
// live.
- auto &DL = Header->getModule()->getDataLayout();
+ auto &DL = Header->getDataLayout();
const SimplifyQuery SQ(DL);
for (auto *BB : RPOT) {
Visited.insert(BB);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 626888c74bad..c84e419c2a24 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -26,7 +26,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
@@ -120,7 +120,7 @@ namespace {
/// Maintains the set of instructions of the loop for a partition before
/// cloning. After cloning, it hosts the new loop.
class InstPartition {
- using InstructionSet = SmallPtrSet<Instruction *, 8>;
+ using InstructionSet = SmallSetVector<Instruction *, 8>;
public:
InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
@@ -166,7 +166,7 @@ public:
// Insert instructions from the loop that we depend on.
for (Value *V : I->operand_values()) {
auto *I = dyn_cast<Instruction>(V);
- if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
+ if (I && OrigLoop->contains(I->getParent()) && Set.insert(I))
Worklist.push_back(I);
}
}
@@ -231,17 +231,16 @@ public:
}
}
- void print() const {
- if (DepCycle)
- dbgs() << " (cycle)\n";
+ void print(raw_ostream &OS) const {
+ OS << (DepCycle ? " (cycle)\n" : "\n");
for (auto *I : Set)
// Prefix with the block name.
- dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
+ OS << " " << I->getParent()->getName() << ":" << *I << "\n";
}
- void printBlocks() const {
+ void printBlocks(raw_ostream &OS) const {
for (auto *BB : getDistributedLoop()->getBlocks())
- dbgs() << *BB;
+ OS << *BB;
}
private:
@@ -368,11 +367,11 @@ public:
std::tie(LoadToPart, NewElt) =
LoadToPartition.insert(std::make_pair(Inst, PartI));
if (!NewElt) {
- LLVM_DEBUG(dbgs()
- << "Merging partitions due to this load in multiple "
- << "partitions: " << PartI << ", " << LoadToPart->second
- << "\n"
- << *Inst << "\n");
+ LLVM_DEBUG(
+ dbgs()
+ << "LDist: Merging partitions due to this load in multiple "
+ << "partitions: " << PartI << ", " << LoadToPart->second << "\n"
+ << *Inst << "\n");
auto PartJ = I;
do {
@@ -530,8 +529,8 @@ public:
void print(raw_ostream &OS) const {
unsigned Index = 0;
for (const auto &P : PartitionContainer) {
- OS << "Partition " << Index++ << " (" << &P << "):\n";
- P.print();
+ OS << "LDist: Partition " << Index++ << ":";
+ P.print(OS);
}
}
@@ -545,11 +544,11 @@ public:
}
#endif
- void printBlocks() const {
+ void printBlocks(raw_ostream &OS) const {
unsigned Index = 0;
for (const auto &P : PartitionContainer) {
- dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
- P.printBlocks();
+ OS << "LDist: Partition " << Index++ << ":";
+ P.printBlocks(OS);
}
}
@@ -628,7 +627,7 @@ public:
const SmallVectorImpl<Dependence> &Dependences) {
Accesses.append(Instructions.begin(), Instructions.end());
- LLVM_DEBUG(dbgs() << "Backward dependences:\n");
+ LLVM_DEBUG(dbgs() << "LDist: Backward dependences:\n");
for (const auto &Dep : Dependences)
if (Dep.isPossiblyBackward()) {
// Note that the designations source and destination follow the program
@@ -659,9 +658,9 @@ public:
bool processLoop() {
assert(L->isInnermost() && "Only process inner loops.");
- LLVM_DEBUG(dbgs() << "\nLDist: In \""
- << L->getHeader()->getParent()->getName()
- << "\" checking " << *L << "\n");
+ LLVM_DEBUG(dbgs() << "\nLDist: Checking a loop in '"
+ << L->getHeader()->getParent()->getName() << "' from "
+ << L->getLocStr() << "\n");
// Having a single exit block implies there's also one exiting block.
if (!L->getExitBlock())
@@ -686,6 +685,9 @@ public:
if (!Dependences || Dependences->empty())
return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
+ LLVM_DEBUG(dbgs() << "LDist: Found a candidate loop: "
+ << L->getHeader()->getName() << "\n");
+
InstPartitionContainer Partitions(L, LI, DT);
// First, go through each memory operation and assign them to consecutive
@@ -735,7 +737,7 @@ public:
for (auto *Inst : DefsUsedOutside)
Partitions.addToNewNonCyclicPartition(Inst);
- LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
+ LLVM_DEBUG(dbgs() << "LDist: Seeded partitions:\n" << Partitions);
if (Partitions.getSize() < 2)
return fail("CantIsolateUnsafeDeps",
"cannot isolate unsafe dependencies");
@@ -743,19 +745,19 @@ public:
// Run the merge heuristics: Merge non-cyclic adjacent partitions since we
// should be able to vectorize these together.
Partitions.mergeBeforePopulating();
- LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
+ LLVM_DEBUG(dbgs() << "LDist: Merged partitions:\n" << Partitions);
if (Partitions.getSize() < 2)
return fail("CantIsolateUnsafeDeps",
"cannot isolate unsafe dependencies");
// Now, populate the partitions with non-memory operations.
Partitions.populateUsedSet();
- LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
+ LLVM_DEBUG(dbgs() << "LDist: Populated partitions:\n" << Partitions);
// In order to preserve original lexical order for loads, keep them in the
// partition that we set up in the MemoryInstructionDependences loop.
if (Partitions.mergeToAvoidDuplicatedLoads()) {
- LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
+ LLVM_DEBUG(dbgs() << "LDist: Partitions merged to ensure unique loads:\n"
<< Partitions);
if (Partitions.getSize() < 2)
return fail("CantIsolateUnsafeDeps",
@@ -779,7 +781,8 @@ public:
if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
return fail("HeuristicDisabled", "distribution heuristic disabled");
- LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
+ LLVM_DEBUG(dbgs() << "LDist: Distributing loop: "
+ << L->getHeader()->getName() << "\n");
// We're done forming the partitions set up the reverse mapping from
// instructions to partitions.
Partitions.setupPartitionIdOnInstructions();
@@ -807,7 +810,7 @@ public:
MDNode *OrigLoopID = L->getLoopID();
- LLVM_DEBUG(dbgs() << "\nPointers:\n");
+ LLVM_DEBUG(dbgs() << "LDist: Pointers:\n");
LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
LVer.versionLoop(DefsUsedOutside);
@@ -830,8 +833,8 @@ public:
// Now, we remove the instruction from each loop that don't belong to that
// partition.
Partitions.removeUnusedInsts();
- LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
- LLVM_DEBUG(Partitions.printBlocks());
+ LLVM_DEBUG(dbgs() << "LDist: After removing unused Instrs:\n");
+ LLVM_DEBUG(Partitions.printBlocks(dbgs()));
if (LDistVerify) {
LI->verify(*DT);
@@ -853,7 +856,7 @@ public:
LLVMContext &Ctx = F->getContext();
bool Forced = isForced().value_or(false);
- LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
+ LLVM_DEBUG(dbgs() << "LDist: Skipping; " << Message << "\n");
// With Rpass-missed report that distribution failed.
ORE->emit([&]() {
@@ -962,11 +965,10 @@ private:
} // end anonymous namespace
-/// Shared implementation between new and old PMs.
static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
ScalarEvolution *SE, OptimizationRemarkEmitter *ORE,
LoopAccessInfoManager &LAIs) {
- // Build up a worklist of inner-loops to vectorize. This is necessary as the
+ // Build up a worklist of inner-loops to distribute. This is necessary as the
// act of distributing a loop creates new loops and can invalidate iterators
// across the loops.
SmallVector<Loop *, 8> Worklist;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index 533cefaf1061..d5e91d3c1dec 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -70,6 +70,7 @@
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include <optional>
@@ -97,6 +98,10 @@ static cl::opt<bool>
cl::desc("Widen the loop induction variables, if possible, so "
"overflow checks won't reject flattening"));
+static cl::opt<bool>
+ VersionLoops("loop-flatten-version-loops", cl::Hidden, cl::init(true),
+ cl::desc("Version loops if flattened loop could overflow"));
+
namespace {
// We require all uses of both induction variables to match this pattern:
//
@@ -141,6 +146,8 @@ struct FlattenInfo {
// has been applied. Used to skip
// checks on phi nodes.
+ Value *NewTripCount = nullptr; // The tripcount of the flattened loop.
+
FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL){};
bool isNarrowInductionPhi(PHINode *Phi) {
@@ -637,7 +644,7 @@ static bool checkIVUsers(FlattenInfo &FI) {
static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT,
AssumptionCache *AC) {
Function *F = FI.OuterLoop->getHeader()->getParent();
- const DataLayout &DL = F->getParent()->getDataLayout();
+ const DataLayout &DL = F->getDataLayout();
// For debugging/testing.
if (AssumeNoOverflow)
@@ -752,11 +759,13 @@ static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
ORE.emit(Remark);
}
- Value *NewTripCount = BinaryOperator::CreateMul(
- FI.InnerTripCount, FI.OuterTripCount, "flatten.tripcount",
- FI.OuterLoop->getLoopPreheader()->getTerminator());
- LLVM_DEBUG(dbgs() << "Created new trip count in preheader: ";
- NewTripCount->dump());
+ if (!FI.NewTripCount) {
+ FI.NewTripCount = BinaryOperator::CreateMul(
+ FI.InnerTripCount, FI.OuterTripCount, "flatten.tripcount",
+ FI.OuterLoop->getLoopPreheader()->getTerminator()->getIterator());
+ LLVM_DEBUG(dbgs() << "Created new trip count in preheader: ";
+ FI.NewTripCount->dump());
+ }
// Fix up PHI nodes that take values from the inner loop back-edge, which
// we are about to remove.
@@ -769,13 +778,15 @@ static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
// Modify the trip count of the outer loop to be the product of the two
// trip counts.
- cast<User>(FI.OuterBranch->getCondition())->setOperand(1, NewTripCount);
+ cast<User>(FI.OuterBranch->getCondition())->setOperand(1, FI.NewTripCount);
// Replace the inner loop backedge with an unconditional branch to the exit.
BasicBlock *InnerExitBlock = FI.InnerLoop->getExitBlock();
BasicBlock *InnerExitingBlock = FI.InnerLoop->getExitingBlock();
- InnerExitingBlock->getTerminator()->eraseFromParent();
- BranchInst::Create(InnerExitBlock, InnerExitingBlock);
+ Instruction *Term = InnerExitingBlock->getTerminator();
+ Instruction *BI = BranchInst::Create(InnerExitBlock, InnerExitingBlock);
+ BI->setDebugLoc(Term->getDebugLoc());
+ Term->eraseFromParent();
// Update the DomTree and MemorySSA.
DT->deleteEdge(InnerExitingBlock, FI.InnerLoop->getHeader());
@@ -799,8 +810,10 @@ static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
// we need to insert the new GEP where the old GEP was.
if (!DT->dominates(Base, &*Builder.GetInsertPoint()))
Builder.SetInsertPoint(cast<Instruction>(V));
- OuterValue = Builder.CreateGEP(GEP->getSourceElementType(), Base,
- OuterValue, "flatten." + V->getName());
+ OuterValue =
+ Builder.CreateGEP(GEP->getSourceElementType(), Base, OuterValue,
+ "flatten." + V->getName(),
+ GEP->isInBounds() && InnerGEP->isInBounds());
}
LLVM_DEBUG(dbgs() << "Replacing: "; V->dump(); dbgs() << "with: ";
@@ -891,7 +904,8 @@ static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
const TargetTransformInfo *TTI, LPMUpdater *U,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU,
+ const LoopAccessInfo &LAI) {
LLVM_DEBUG(
dbgs() << "Loop flattening running on outer loop "
<< FI.OuterLoop->getHeader()->getName() << " and inner loop "
@@ -926,18 +940,55 @@ static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
// variable might overflow. In this case, we need to version the loop, and
// select the original version at runtime if the iteration space is too
// large.
- // TODO: We currently don't version the loop.
OverflowResult OR = checkOverflow(FI, DT, AC);
if (OR == OverflowResult::AlwaysOverflowsHigh ||
OR == OverflowResult::AlwaysOverflowsLow) {
LLVM_DEBUG(dbgs() << "Multiply would always overflow, so not profitable\n");
return false;
} else if (OR == OverflowResult::MayOverflow) {
- LLVM_DEBUG(dbgs() << "Multiply might overflow, not flattening\n");
- return false;
+ Module *M = FI.OuterLoop->getHeader()->getParent()->getParent();
+ const DataLayout &DL = M->getDataLayout();
+ if (!VersionLoops) {
+ LLVM_DEBUG(dbgs() << "Multiply might overflow, not flattening\n");
+ return false;
+ } else if (!DL.isLegalInteger(
+ FI.OuterTripCount->getType()->getScalarSizeInBits())) {
+ // If the trip count type isn't legal then it won't be possible to check
+ // for overflow using only a single multiply instruction, so don't
+ // flatten.
+ LLVM_DEBUG(
+ dbgs() << "Can't check overflow efficiently, not flattening\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "Multiply might overflow, versioning loop\n");
+
+ // Version the loop. The overflow check isn't a runtime pointer check, so we
+ // pass an empty list of runtime pointer checks, causing LoopVersioning to
+ // emit 'false' as the branch condition, and add our own check afterwards.
+ BasicBlock *CheckBlock = FI.OuterLoop->getLoopPreheader();
+ ArrayRef<RuntimePointerCheck> Checks(nullptr, nullptr);
+ LoopVersioning LVer(LAI, Checks, FI.OuterLoop, LI, DT, SE);
+ LVer.versionLoop();
+
+ // Check for overflow by calculating the new tripcount using
+ // umul_with_overflow and then checking if it overflowed.
+ BranchInst *Br = cast<BranchInst>(CheckBlock->getTerminator());
+ assert(Br->isConditional() &&
+ "Expected LoopVersioning to generate a conditional branch");
+ assert(match(Br->getCondition(), m_Zero()) &&
+ "Expected branch condition to be false");
+ IRBuilder<> Builder(Br);
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow,
+ FI.OuterTripCount->getType());
+ Value *Call = Builder.CreateCall(F, {FI.OuterTripCount, FI.InnerTripCount},
+ "flatten.mul");
+ FI.NewTripCount = Builder.CreateExtractValue(Call, 0, "flatten.tripcount");
+ Value *Overflow = Builder.CreateExtractValue(Call, 1, "flatten.overflow");
+ Br->setCondition(Overflow);
+ } else {
+ LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n");
}
- LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n");
return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U, MSSAU);
}
@@ -958,13 +1009,15 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
// in simplified form, and also needs LCSSA. Running
// this pass will simplify all loops that contain inner loops,
// regardless of whether anything ends up being flattened.
+ LoopAccessInfoManager LAIM(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr);
for (Loop *InnerLoop : LN.getLoops()) {
auto *OuterLoop = InnerLoop->getParentLoop();
if (!OuterLoop)
continue;
FlattenInfo FI(OuterLoop, InnerLoop);
- Changed |= FlattenLoopPair(FI, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, &U,
- MSSAU ? &*MSSAU : nullptr);
+ Changed |=
+ FlattenLoopPair(FI, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, &U,
+ MSSAU ? &*MSSAU : nullptr, LAIM.getInfo(*OuterLoop));
}
if (!Changed)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index e0b224d5ef73..8512b2accbe7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -1684,7 +1684,7 @@ private:
PHINode::Create(LCV->getType(), 2, LCPHI->getName() + ".afterFC0");
L1HeaderPHI->insertBefore(L1HeaderIP);
L1HeaderPHI->addIncoming(LCV, FC0.Latch);
- L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()),
+ L1HeaderPHI->addIncoming(PoisonValue::get(LCV->getType()),
FC0.ExitingBlock);
LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);
@@ -2072,7 +2072,7 @@ PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
const TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
- const DataLayout &DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getDataLayout();
// Ensure loops are in simplifed form which is a pre-requisite for loop fusion
// pass. Added only for new PM since the legacy PM has already added
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 3721564890dd..0ee1afa76a82 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -22,8 +22,6 @@
//
// Future loop memory idioms to recognize:
// memcmp, strlen, etc.
-// Future floating point idioms to recognize in -ffast-math mode:
-// fpowi
//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
@@ -233,12 +231,19 @@ private:
bool recognizePopcount();
void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
PHINode *CntPhi, Value *Var);
+ bool isProfitableToInsertFFS(Intrinsic::ID IntrinID, Value *InitX,
+ bool ZeroCheck, size_t CanonicalSize);
+ bool insertFFSIfProfitable(Intrinsic::ID IntrinID, Value *InitX,
+ Instruction *DefX, PHINode *CntPhi,
+ Instruction *CntInst);
bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
+ bool recognizeShiftUntilLessThan();
void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
Instruction *CntInst, PHINode *CntPhi,
Value *Var, Instruction *DefX,
const DebugLoc &DL, bool ZeroCheck,
- bool IsCntPhiUsedOutsideLoop);
+ bool IsCntPhiUsedOutsideLoop,
+ bool InsertSub = false);
bool recognizeShiftUntilBitTest();
bool recognizeShiftUntilZero();
@@ -253,7 +258,7 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
if (DisableLIRP::All)
return PreservedAnalyses::all();
- const auto *DL = &L.getHeader()->getModule()->getDataLayout();
+ const auto *DL = &L.getHeader()->getDataLayout();
// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations
@@ -1107,7 +1112,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
GV->setAlignment(Align(16));
Value *PatternPtr = GV;
NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
-
+
// Set the TBAA info if present.
if (AATags.TBAA)
NewCall->setMetadata(LLVMContext::MD_tbaa, AATags.TBAA);
@@ -1117,7 +1122,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
if (AATags.NoAlias)
NewCall->setMetadata(LLVMContext::MD_noalias, AATags.NoAlias);
- }
+ }
NewCall->setDebugLoc(TheStore->getDebugLoc());
@@ -1484,7 +1489,8 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
<< CurLoop->getHeader()->getName() << "\n");
return recognizePopcount() || recognizeAndInsertFFS() ||
- recognizeShiftUntilBitTest() || recognizeShiftUntilZero();
+ recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
+ recognizeShiftUntilLessThan();
}
/// Check if the given conditional branch is based on the comparison between
@@ -1519,6 +1525,34 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
return nullptr;
}
+/// Check if the given conditional branch is based on an unsigned less-than
+/// comparison between a variable and a constant, and if the comparison is false
+/// the control yields to the loop entry. If the branch matches the behaviour,
+/// the variable involved in the comparison is returned.
+static Value *matchShiftULTCondition(BranchInst *BI, BasicBlock *LoopEntry,
+ APInt &Threshold) {
+ if (!BI || !BI->isConditional())
+ return nullptr;
+
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cond)
+ return nullptr;
+
+ ConstantInt *CmpConst = dyn_cast<ConstantInt>(Cond->getOperand(1));
+ if (!CmpConst)
+ return nullptr;
+
+ BasicBlock *FalseSucc = BI->getSuccessor(1);
+ ICmpInst::Predicate Pred = Cond->getPredicate();
+
+ if (Pred == ICmpInst::ICMP_ULT && FalseSucc == LoopEntry) {
+ Threshold = CmpConst->getValue();
+ return Cond->getOperand(0);
+ }
+
+ return nullptr;
+}
+
// Check if the recurrence variable `VarX` is in the right form to create
// the idiom. Returns the value coerced to a PHINode if so.
static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
@@ -1530,6 +1564,107 @@ static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX,
return nullptr;
}
+/// Return true if the idiom is detected in the loop.
+///
+/// Additionally:
+/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ)
+/// or nullptr if there is no such.
+/// 2) \p CntPhi is set to the corresponding phi node
+/// or nullptr if there is no such.
+/// 3) \p InitX is set to the value whose CTLZ could be used.
+/// 4) \p DefX is set to the instruction calculating Loop exit condition.
+/// 5) \p Threshold is set to the constant involved in the unsigned less-than
+/// comparison.
+///
+/// The core idiom we are trying to detect is:
+/// \code
+/// if (x0 < 2)
+/// goto loop-exit // the precondition of the loop
+/// cnt0 = init-val
+/// do {
+/// x = phi (x0, x.next); //PhiX
+/// cnt = phi (cnt0, cnt.next)
+///
+/// cnt.next = cnt + 1;
+/// ...
+/// x.next = x >> 1; // DefX
+/// } while (x >= 4)
+/// loop-exit:
+/// \endcode
+static bool detectShiftUntilLessThanIdiom(Loop *CurLoop, const DataLayout &DL,
+ Intrinsic::ID &IntrinID,
+ Value *&InitX, Instruction *&CntInst,
+ PHINode *&CntPhi, Instruction *&DefX,
+ APInt &Threshold) {
+ BasicBlock *LoopEntry;
+
+ DefX = nullptr;
+ CntInst = nullptr;
+ CntPhi = nullptr;
+ LoopEntry = *(CurLoop->block_begin());
+
+ // step 1: Check if the loop-back branch is in desirable form.
+ if (Value *T = matchShiftULTCondition(
+ dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry,
+ Threshold))
+ DefX = dyn_cast<Instruction>(T);
+ else
+ return false;
+
+ // step 2: Check the recurrence of variable X
+ if (!DefX || !isa<PHINode>(DefX))
+ return false;
+
+ PHINode *VarPhi = cast<PHINode>(DefX);
+ int Idx = VarPhi->getBasicBlockIndex(LoopEntry);
+ if (Idx == -1)
+ return false;
+
+ DefX = dyn_cast<Instruction>(VarPhi->getIncomingValue(Idx));
+ if (!DefX || DefX->getNumOperands() == 0 || DefX->getOperand(0) != VarPhi)
+ return false;
+
+ // step 3: detect instructions corresponding to "x.next = x >> 1"
+ if (DefX->getOpcode() != Instruction::LShr)
+ return false;
+
+ IntrinID = Intrinsic::ctlz;
+ ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));
+ if (!Shft || !Shft->isOne())
+ return false;
+
+ InitX = VarPhi->getIncomingValueForBlock(CurLoop->getLoopPreheader());
+
+ // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1
+ // or cnt.next = cnt + -1.
+ // TODO: We can skip the step. If loop trip count is known (CTLZ),
+ // then all uses of "cnt.next" could be optimized to the trip count
+ // plus "cnt0". Currently it is not optimized.
+ // This step could be used to detect POPCNT instruction:
+ // cnt.next = cnt + (x.next & 1)
+ for (Instruction &Inst : llvm::make_range(
+ LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
+ if (Inst.getOpcode() != Instruction::Add)
+ continue;
+
+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
+ if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
+ continue;
+
+ PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
+ if (!Phi)
+ continue;
+
+ CntInst = &Inst;
+ CntPhi = Phi;
+ break;
+ }
+ if (!CntInst)
+ return false;
+
+ return true;
+}
+
/// Return true iff the idiom is detected in the loop.
///
/// Additionally:
@@ -1758,27 +1893,35 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
return true;
}
-/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
-/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
-/// trip count returns true; otherwise, returns false.
-bool LoopIdiomRecognize::recognizeAndInsertFFS() {
- // Give up if the loop has multiple blocks or multiple backedges.
- if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
- return false;
+// Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
+// profitable if we delete the loop.
+bool LoopIdiomRecognize::isProfitableToInsertFFS(Intrinsic::ID IntrinID,
+ Value *InitX, bool ZeroCheck,
+ size_t CanonicalSize) {
+ const Value *Args[] = {InitX,
+ ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
- Intrinsic::ID IntrinID;
- Value *InitX;
- Instruction *DefX = nullptr;
- PHINode *CntPhi = nullptr;
- Instruction *CntInst = nullptr;
- // Help decide if transformation is profitable. For ShiftUntilZero idiom,
- // this is always 6.
- size_t IdiomCanonicalSize = 6;
+ // @llvm.dbg doesn't count as they have no semantic effect.
+ auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
+ uint32_t HeaderSize =
+ std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
- if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX,
- CntInst, CntPhi, DefX))
+ IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
+ InstructionCost Cost = TTI->getIntrinsicInstrCost(
+ Attrs, TargetTransformInfo::TCK_SizeAndLatency);
+ if (HeaderSize != CanonicalSize && Cost > TargetTransformInfo::TCC_Basic)
return false;
+ return true;
+}
+
+/// Convert CTLZ / CTTZ idiom loop into countable loop.
+/// If CTLZ / CTTZ inserted as a new trip count returns true; otherwise,
+/// returns false.
+bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,
+ Value *InitX, Instruction *DefX,
+ PHINode *CntPhi,
+ Instruction *CntInst) {
bool IsCntPhiUsedOutsideLoop = false;
for (User *U : CntPhi->users())
if (!CurLoop->contains(cast<Instruction>(U))) {
@@ -1820,35 +1963,107 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
ZeroCheck = true;
}
- // Check if CTLZ / CTTZ intrinsic is profitable. Assume it is always
- // profitable if we delete the loop.
-
- // the loop has only 6 instructions:
+ // FFS idiom loop has only 6 instructions:
// %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
// %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
// %shr = ashr %n.addr.0, 1
// %tobool = icmp eq %shr, 0
// %inc = add nsw %i.0, 1
// br i1 %tobool
+ size_t IdiomCanonicalSize = 6;
+ if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
+ return false;
- const Value *Args[] = {InitX,
- ConstantInt::getBool(InitX->getContext(), ZeroCheck)};
+ transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
+ DefX->getDebugLoc(), ZeroCheck,
+ IsCntPhiUsedOutsideLoop);
+ return true;
+}
- // @llvm.dbg doesn't count as they have no semantic effect.
- auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
- uint32_t HeaderSize =
- std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
+/// Recognize CTLZ or CTTZ idiom in a non-countable loop and convert the loop
+/// to countable (with CTLZ / CTTZ trip count). If CTLZ / CTTZ inserted as a new
+/// trip count returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizeAndInsertFFS() {
+ // Give up if the loop has multiple blocks or multiple backedges.
+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+ return false;
- IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
- InstructionCost Cost =
- TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
- if (HeaderSize != IdiomCanonicalSize &&
- Cost > TargetTransformInfo::TCC_Basic)
+ Intrinsic::ID IntrinID;
+ Value *InitX;
+ Instruction *DefX = nullptr;
+ PHINode *CntPhi = nullptr;
+ Instruction *CntInst = nullptr;
+
+ if (!detectShiftUntilZeroIdiom(CurLoop, *DL, IntrinID, InitX, CntInst, CntPhi,
+ DefX))
+ return false;
+
+ return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
+}
+
+bool LoopIdiomRecognize::recognizeShiftUntilLessThan() {
+ // Give up if the loop has multiple blocks or multiple backedges.
+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+ return false;
+
+ Intrinsic::ID IntrinID;
+ Value *InitX;
+ Instruction *DefX = nullptr;
+ PHINode *CntPhi = nullptr;
+ Instruction *CntInst = nullptr;
+
+ APInt LoopThreshold;
+ if (!detectShiftUntilLessThanIdiom(CurLoop, *DL, IntrinID, InitX, CntInst,
+ CntPhi, DefX, LoopThreshold))
+ return false;
+
+ if (LoopThreshold == 2) {
+ // Treat as regular FFS.
+ return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);
+ }
+
+ // Look for Floor Log2 Idiom.
+ if (LoopThreshold != 4)
+ return false;
+
+ // Abort if CntPhi is used outside of the loop.
+ for (User *U : CntPhi->users())
+ if (!CurLoop->contains(cast<Instruction>(U)))
+ return false;
+
+ // It is safe to assume Preheader exist as it was checked in
+ // parent function RunOnLoop.
+ BasicBlock *PH = CurLoop->getLoopPreheader();
+ auto *PreCondBB = PH->getSinglePredecessor();
+ if (!PreCondBB)
+ return false;
+ auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+ if (!PreCondBI)
+ return false;
+
+ APInt PreLoopThreshold;
+ if (matchShiftULTCondition(PreCondBI, PH, PreLoopThreshold) != InitX ||
+ PreLoopThreshold != 2)
return false;
+ bool ZeroCheck = true;
+
+ // the loop has only 6 instructions:
+ // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ]
+ // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ]
+ // %shr = ashr %n.addr.0, 1
+ // %tobool = icmp ult %n.addr.0, C
+ // %inc = add nsw %i.0, 1
+ // br i1 %tobool
+ size_t IdiomCanonicalSize = 6;
+ if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))
+ return false;
+
+ // log2(x) = w − 1 − clz(x)
transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
DefX->getDebugLoc(), ZeroCheck,
- IsCntPhiUsedOutsideLoop);
+ /*IsCntPhiUsedOutsideLoop=*/false,
+ /*InsertSub=*/true);
return true;
}
@@ -1963,7 +2178,7 @@ static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
void LoopIdiomRecognize::transformLoopToCountable(
Intrinsic::ID IntrinID, BasicBlock *Preheader, Instruction *CntInst,
PHINode *CntPhi, Value *InitX, Instruction *DefX, const DebugLoc &DL,
- bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) {
+ bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub) {
BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
// Step 1: Insert the CTLZ/CTTZ instruction at the end of the preheader block
@@ -1993,6 +2208,8 @@ void LoopIdiomRecognize::transformLoopToCountable(
Type *CountTy = Count->getType();
Count = Builder.CreateSub(
ConstantInt::get(CountTy, CountTy->getIntegerBitWidth()), Count);
+ if (InsertSub)
+ Count = Builder.CreateSub(Count, ConstantInt::get(CountTy, 1));
Value *NewCount = Count;
if (IsCntPhiUsedOutsideLoop)
Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));
@@ -2409,15 +2626,15 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
if (!isGuaranteedNotToBeUndefOrPoison(BitPos)) {
// BitMask may be computed from BitPos, Freeze BitPos so we can increase
// it's use count.
- Instruction *InsertPt = nullptr;
+ std::optional<BasicBlock::iterator> InsertPt = std::nullopt;
if (auto *BitPosI = dyn_cast<Instruction>(BitPos))
- InsertPt = &**BitPosI->getInsertionPointAfterDef();
+ InsertPt = BitPosI->getInsertionPointAfterDef();
else
- InsertPt = &*DT->getRoot()->getFirstNonPHIOrDbgOrAlloca();
+ InsertPt = DT->getRoot()->getFirstNonPHIOrDbgOrAlloca();
if (!InsertPt)
return false;
FreezeInst *BitPosFrozen =
- new FreezeInst(BitPos, BitPos->getName() + ".fr", InsertPt);
+ new FreezeInst(BitPos, BitPos->getName() + ".fr", *InsertPt);
BitPos->replaceUsesWithIf(BitPosFrozen, [BitPosFrozen](Use &U) {
return U.getUser() != BitPosFrozen;
});
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index cfe069d00bce..270c2120365c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -45,7 +45,7 @@ STATISTIC(NumSimplified, "Number of redundant instructions simplified");
static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
AssumptionCache &AC, const TargetLibraryInfo &TLI,
MemorySSAUpdater *MSSAU) {
- const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
+ const DataLayout &DL = L.getHeader()->getDataLayout();
SimplifyQuery SQ(DL, &TLI, &DT, &AC);
// On the first pass over the loop body we try to simplify every instruction.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 277f530ee25f..400973fd9fc9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -976,7 +976,7 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
}
if (!findInductions(InnerLoop, InnerLoopInductions)) {
- LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n");
+ LLVM_DEBUG(dbgs() << "Could not find inner loop induction variables.\n");
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 5ec387300aac..489f12e689d3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -96,7 +96,7 @@ struct StoreToLoadForwardingCandidate {
Value *LoadPtr = Load->getPointerOperand();
Value *StorePtr = Store->getPointerOperand();
Type *LoadType = getLoadStoreType(Load);
- auto &DL = Load->getParent()->getModule()->getDataLayout();
+ auto &DL = Load->getDataLayout();
assert(LoadPtr->getType()->getPointerAddressSpace() ==
StorePtr->getType()->getPointerAddressSpace() &&
@@ -126,8 +126,10 @@ struct StoreToLoadForwardingCandidate {
// We don't need to check non-wrapping here because forward/backward
// dependence wouldn't be valid if these weren't monotonic accesses.
- auto *Dist = cast<SCEVConstant>(
+ auto *Dist = dyn_cast<SCEVConstant>(
PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
+ if (!Dist)
+ return false;
const APInt &Val = Dist->getAPInt();
return Val == TypeByteSize * StrideLoad;
}
@@ -181,7 +183,8 @@ public:
findStoreToLoadDependences(const LoopAccessInfo &LAI) {
std::forward_list<StoreToLoadForwardingCandidate> Candidates;
- const auto *Deps = LAI.getDepChecker().getDependences();
+ const auto &DepChecker = LAI.getDepChecker();
+ const auto *Deps = DepChecker.getDependences();
if (!Deps)
return Candidates;
@@ -192,8 +195,8 @@ public:
SmallPtrSet<Instruction *, 4> LoadsWithUnknownDepedence;
for (const auto &Dep : *Deps) {
- Instruction *Source = Dep.getSource(LAI);
- Instruction *Destination = Dep.getDestination(LAI);
+ Instruction *Source = Dep.getSource(DepChecker);
+ Instruction *Destination = Dep.getDestination(DepChecker);
if (Dep.Type == MemoryDepChecker::Dependence::Unknown ||
Dep.Type == MemoryDepChecker::Dependence::IndirectUnsafe) {
@@ -222,7 +225,7 @@ public:
// Only propagate if the stored values are bit/pointer castable.
if (!CastInst::isBitOrNoopPointerCastable(
getLoadStoreType(Store), getLoadStoreType(Load),
- Store->getParent()->getModule()->getDataLayout()))
+ Store->getDataLayout()))
continue;
Candidates.emplace_front(Load, Store);
@@ -349,19 +352,20 @@ public:
// ld0.
LoadInst *LastLoad =
- std::max_element(Candidates.begin(), Candidates.end(),
- [&](const StoreToLoadForwardingCandidate &A,
- const StoreToLoadForwardingCandidate &B) {
- return getInstrIndex(A.Load) < getInstrIndex(B.Load);
- })
+ llvm::max_element(Candidates,
+ [&](const StoreToLoadForwardingCandidate &A,
+ const StoreToLoadForwardingCandidate &B) {
+ return getInstrIndex(A.Load) <
+ getInstrIndex(B.Load);
+ })
->Load;
StoreInst *FirstStore =
- std::min_element(Candidates.begin(), Candidates.end(),
- [&](const StoreToLoadForwardingCandidate &A,
- const StoreToLoadForwardingCandidate &B) {
- return getInstrIndex(A.Store) <
- getInstrIndex(B.Store);
- })
+ llvm::min_element(Candidates,
+ [&](const StoreToLoadForwardingCandidate &A,
+ const StoreToLoadForwardingCandidate &B) {
+ return getInstrIndex(A.Store) <
+ getInstrIndex(B.Store);
+ })
->Store;
// We're looking for stores after the first forwarding store until the end
@@ -440,9 +444,14 @@ public:
assert(PH && "Preheader should exist!");
Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
PH->getTerminator());
- Value *Initial = new LoadInst(
- Cand.Load->getType(), InitialPtr, "load_initial",
- /* isVolatile */ false, Cand.Load->getAlign(), PH->getTerminator());
+ Value *Initial =
+ new LoadInst(Cand.Load->getType(), InitialPtr, "load_initial",
+ /* isVolatile */ false, Cand.Load->getAlign(),
+ PH->getTerminator()->getIterator());
+ // We don't give any debug location to Initial, because it is inserted
+ // into the loop's preheader. A debug location inside the loop will cause
+ // a misleading stepping when debugging. The test update-debugloc-store
+ // -forwarded.ll checks this.
PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded");
PHI->insertBefore(L->getHeader()->begin());
@@ -450,20 +459,27 @@ public:
Type *LoadType = Initial->getType();
Type *StoreType = Cand.Store->getValueOperand()->getType();
- auto &DL = Cand.Load->getParent()->getModule()->getDataLayout();
+ auto &DL = Cand.Load->getDataLayout();
(void)DL;
assert(DL.getTypeSizeInBits(LoadType) == DL.getTypeSizeInBits(StoreType) &&
"The type sizes should match!");
Value *StoreValue = Cand.Store->getValueOperand();
- if (LoadType != StoreType)
- StoreValue = CastInst::CreateBitOrPointerCast(
- StoreValue, LoadType, "store_forward_cast", Cand.Store);
+ if (LoadType != StoreType) {
+ StoreValue = CastInst::CreateBitOrPointerCast(StoreValue, LoadType,
+ "store_forward_cast",
+ Cand.Store->getIterator());
+ // Because it casts the old `load` value and is used by the new `phi`
+ // which replaces the old `load`, we give the `load`'s debug location
+ // to it.
+ cast<Instruction>(StoreValue)->setDebugLoc(Cand.Load->getDebugLoc());
+ }
PHI->addIncoming(StoreValue, L->getLoopLatch());
Cand.Load->replaceAllUsesWith(PHI);
+ PHI->setDebugLoc(Cand.Load->getDebugLoc());
}
/// Top-level driver for each loop: find store->load forwarding
@@ -601,7 +617,7 @@ public:
// Next, propagate the value stored by the store to the users of the load.
// Also for the first iteration, generate the initial value of the load.
- SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getModule()->getDataLayout(),
+ SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getDataLayout(),
"storeforward");
for (const auto &Cand : Candidates)
propagateStoredValueToLoadUsers(Cand, SEE);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
deleted file mode 100644
index 7f62526a4f6d..000000000000
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ /dev/null
@@ -1,1679 +0,0 @@
-//===- LoopReroll.cpp - Loop rerolling pass -------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass implements a simple loop reroller.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar/LoopReroll.h"
-#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <iterator>
-#include <map>
-#include <utility>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "loop-reroll"
-
-STATISTIC(NumRerolledLoops, "Number of rerolled loops");
-
-static cl::opt<unsigned>
-NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
- cl::Hidden,
- cl::desc("The maximum number of failures to tolerate"
- " during fuzzy matching. (default: 400)"));
-
-// This loop re-rolling transformation aims to transform loops like this:
-//
-// int foo(int a);
-// void bar(int *x) {
-// for (int i = 0; i < 500; i += 3) {
-// foo(i);
-// foo(i+1);
-// foo(i+2);
-// }
-// }
-//
-// into a loop like this:
-//
-// void bar(int *x) {
-// for (int i = 0; i < 500; ++i)
-// foo(i);
-// }
-//
-// It does this by looking for loops that, besides the latch code, are composed
-// of isomorphic DAGs of instructions, with each DAG rooted at some increment
-// to the induction variable, and where each DAG is isomorphic to the DAG
-// rooted at the induction variable (excepting the sub-DAGs which root the
-// other induction-variable increments). In other words, we're looking for loop
-// bodies of the form:
-//
-// %iv = phi [ (preheader, ...), (body, %iv.next) ]
-// f(%iv)
-// %iv.1 = add %iv, 1 <-- a root increment
-// f(%iv.1)
-// %iv.2 = add %iv, 2 <-- a root increment
-// f(%iv.2)
-// %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
-// f(%iv.scale_m_1)
-// ...
-// %iv.next = add %iv, scale
-// %cmp = icmp(%iv, ...)
-// br %cmp, header, exit
-//
-// where each f(i) is a set of instructions that, collectively, are a function
-// only of i (and other loop-invariant values).
-//
-// As a special case, we can also reroll loops like this:
-//
-// int foo(int);
-// void bar(int *x) {
-// for (int i = 0; i < 500; ++i) {
-// x[3*i] = foo(0);
-// x[3*i+1] = foo(0);
-// x[3*i+2] = foo(0);
-// }
-// }
-//
-// into this:
-//
-// void bar(int *x) {
-// for (int i = 0; i < 1500; ++i)
-// x[i] = foo(0);
-// }
-//
-// in which case, we're looking for inputs like this:
-//
-// %iv = phi [ (preheader, ...), (body, %iv.next) ]
-// %scaled.iv = mul %iv, scale
-// f(%scaled.iv)
-// %scaled.iv.1 = add %scaled.iv, 1
-// f(%scaled.iv.1)
-// %scaled.iv.2 = add %scaled.iv, 2
-// f(%scaled.iv.2)
-// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
-// f(%scaled.iv.scale_m_1)
-// ...
-// %iv.next = add %iv, 1
-// %cmp = icmp(%iv, ...)
-// br %cmp, header, exit
-
-namespace {
-
- enum IterationLimits {
- /// The maximum number of iterations that we'll try and reroll.
- IL_MaxRerollIterations = 32,
- /// The bitvector index used by loop induction variables and other
- /// instructions that belong to all iterations.
- IL_All,
- IL_End
- };
-
- class LoopReroll {
- public:
- LoopReroll(AliasAnalysis *AA, LoopInfo *LI, ScalarEvolution *SE,
- TargetLibraryInfo *TLI, DominatorTree *DT, bool PreserveLCSSA)
- : AA(AA), LI(LI), SE(SE), TLI(TLI), DT(DT),
- PreserveLCSSA(PreserveLCSSA) {}
- bool runOnLoop(Loop *L);
-
- protected:
- AliasAnalysis *AA;
- LoopInfo *LI;
- ScalarEvolution *SE;
- TargetLibraryInfo *TLI;
- DominatorTree *DT;
- bool PreserveLCSSA;
-
- using SmallInstructionVector = SmallVector<Instruction *, 16>;
- using SmallInstructionSet = SmallPtrSet<Instruction *, 16>;
- using TinyInstructionVector = SmallVector<Instruction *, 1>;
-
- // Map between induction variable and its increment
- DenseMap<Instruction *, int64_t> IVToIncMap;
-
- // For loop with multiple induction variables, remember the ones used only to
- // control the loop.
- TinyInstructionVector LoopControlIVs;
-
- // A chain of isomorphic instructions, identified by a single-use PHI
- // representing a reduction. Only the last value may be used outside the
- // loop.
- struct SimpleLoopReduction {
- SimpleLoopReduction(Instruction *P, Loop *L) : Instructions(1, P) {
- assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
- add(L);
- }
-
- bool valid() const {
- return Valid;
- }
-
- Instruction *getPHI() const {
- assert(Valid && "Using invalid reduction");
- return Instructions.front();
- }
-
- Instruction *getReducedValue() const {
- assert(Valid && "Using invalid reduction");
- return Instructions.back();
- }
-
- Instruction *get(size_t i) const {
- assert(Valid && "Using invalid reduction");
- return Instructions[i+1];
- }
-
- Instruction *operator [] (size_t i) const { return get(i); }
-
- // The size, ignoring the initial PHI.
- size_t size() const {
- assert(Valid && "Using invalid reduction");
- return Instructions.size()-1;
- }
-
- using iterator = SmallInstructionVector::iterator;
- using const_iterator = SmallInstructionVector::const_iterator;
-
- iterator begin() {
- assert(Valid && "Using invalid reduction");
- return std::next(Instructions.begin());
- }
-
- const_iterator begin() const {
- assert(Valid && "Using invalid reduction");
- return std::next(Instructions.begin());
- }
-
- iterator end() { return Instructions.end(); }
- const_iterator end() const { return Instructions.end(); }
-
- protected:
- bool Valid = false;
- SmallInstructionVector Instructions;
-
- void add(Loop *L);
- };
-
- // The set of all reductions, and state tracking of possible reductions
- // during loop instruction processing.
- struct ReductionTracker {
- using SmallReductionVector = SmallVector<SimpleLoopReduction, 16>;
-
- // Add a new possible reduction.
- void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); }
-
- // Setup to track possible reductions corresponding to the provided
- // rerolling scale. Only reductions with a number of non-PHI instructions
- // that is divisible by the scale are considered. Three instructions sets
- // are filled in:
- // - A set of all possible instructions in eligible reductions.
- // - A set of all PHIs in eligible reductions
- // - A set of all reduced values (last instructions) in eligible
- // reductions.
- void restrictToScale(uint64_t Scale,
- SmallInstructionSet &PossibleRedSet,
- SmallInstructionSet &PossibleRedPHISet,
- SmallInstructionSet &PossibleRedLastSet) {
- PossibleRedIdx.clear();
- PossibleRedIter.clear();
- Reds.clear();
-
- for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
- if (PossibleReds[i].size() % Scale == 0) {
- PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
- PossibleRedPHISet.insert(PossibleReds[i].getPHI());
-
- PossibleRedSet.insert(PossibleReds[i].getPHI());
- PossibleRedIdx[PossibleReds[i].getPHI()] = i;
- for (Instruction *J : PossibleReds[i]) {
- PossibleRedSet.insert(J);
- PossibleRedIdx[J] = i;
- }
- }
- }
-
- // The functions below are used while processing the loop instructions.
-
- // Are the two instructions both from reductions, and furthermore, from
- // the same reduction?
- bool isPairInSame(Instruction *J1, Instruction *J2) {
- DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
- if (J1I != PossibleRedIdx.end()) {
- DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
- if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
- return true;
- }
-
- return false;
- }
-
- // The two provided instructions, the first from the base iteration, and
- // the second from iteration i, form a matched pair. If these are part of
- // a reduction, record that fact.
- void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
- if (PossibleRedIdx.count(J1)) {
- assert(PossibleRedIdx.count(J2) &&
- "Recording reduction vs. non-reduction instruction?");
-
- PossibleRedIter[J1] = 0;
- PossibleRedIter[J2] = i;
-
- int Idx = PossibleRedIdx[J1];
- assert(Idx == PossibleRedIdx[J2] &&
- "Recording pair from different reductions?");
- Reds.insert(Idx);
- }
- }
-
- // The functions below can be called after we've finished processing all
- // instructions in the loop, and we know which reductions were selected.
-
- bool validateSelected();
- void replaceSelected();
-
- protected:
- // The vector of all possible reductions (for any scale).
- SmallReductionVector PossibleReds;
-
- DenseMap<Instruction *, int> PossibleRedIdx;
- DenseMap<Instruction *, int> PossibleRedIter;
- DenseSet<int> Reds;
- };
-
- // A DAGRootSet models an induction variable being used in a rerollable
- // loop. For example,
- //
- // x[i*3+0] = y1
- // x[i*3+1] = y2
- // x[i*3+2] = y3
- //
- // Base instruction -> i*3
- // +---+----+
- // / | \
- // ST[y1] +1 +2 <-- Roots
- // | |
- // ST[y2] ST[y3]
- //
- // There may be multiple DAGRoots, for example:
- //
- // x[i*2+0] = ... (1)
- // x[i*2+1] = ... (1)
- // x[i*2+4] = ... (2)
- // x[i*2+5] = ... (2)
- // x[(i+1234)*2+5678] = ... (3)
- // x[(i+1234)*2+5679] = ... (3)
- //
- // The loop will be rerolled by adding a new loop induction variable,
- // one for the Base instruction in each DAGRootSet.
- //
- struct DAGRootSet {
- Instruction *BaseInst;
- SmallInstructionVector Roots;
-
- // The instructions between IV and BaseInst (but not including BaseInst).
- SmallInstructionSet SubsumedInsts;
- };
-
- // The set of all DAG roots, and state tracking of all roots
- // for a particular induction variable.
- struct DAGRootTracker {
- DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
- ScalarEvolution *SE, AliasAnalysis *AA,
- TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI,
- bool PreserveLCSSA,
- DenseMap<Instruction *, int64_t> &IncrMap,
- TinyInstructionVector LoopCtrlIVs)
- : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
- PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
- LoopControlIVs(LoopCtrlIVs) {}
-
- /// Stage 1: Find all the DAG roots for the induction variable.
- bool findRoots();
-
- /// Stage 2: Validate if the found roots are valid.
- bool validate(ReductionTracker &Reductions);
-
- /// Stage 3: Assuming validate() returned true, perform the
- /// replacement.
- /// @param BackedgeTakenCount The backedge-taken count of L.
- void replace(const SCEV *BackedgeTakenCount);
-
- protected:
- using UsesTy = MapVector<Instruction *, BitVector>;
-
- void findRootsRecursive(Instruction *IVU,
- SmallInstructionSet SubsumedInsts);
- bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
- bool collectPossibleRoots(Instruction *Base,
- std::map<int64_t,Instruction*> &Roots);
- bool validateRootSet(DAGRootSet &DRS);
-
- bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
- void collectInLoopUserSet(const SmallInstructionVector &Roots,
- const SmallInstructionSet &Exclude,
- const SmallInstructionSet &Final,
- DenseSet<Instruction *> &Users);
- void collectInLoopUserSet(Instruction *Root,
- const SmallInstructionSet &Exclude,
- const SmallInstructionSet &Final,
- DenseSet<Instruction *> &Users);
-
- UsesTy::iterator nextInstr(int Val, UsesTy &In,
- const SmallInstructionSet &Exclude,
- UsesTy::iterator *StartI=nullptr);
- bool isBaseInst(Instruction *I);
- bool isRootInst(Instruction *I);
- bool instrDependsOn(Instruction *I,
- UsesTy::iterator Start,
- UsesTy::iterator End);
- void replaceIV(DAGRootSet &DRS, const SCEV *Start, const SCEV *IncrExpr);
-
- LoopReroll *Parent;
-
- // Members of Parent, replicated here for brevity.
- Loop *L;
- ScalarEvolution *SE;
- AliasAnalysis *AA;
- TargetLibraryInfo *TLI;
- DominatorTree *DT;
- LoopInfo *LI;
- bool PreserveLCSSA;
-
- // The loop induction variable.
- Instruction *IV;
-
- // Loop step amount.
- int64_t Inc;
-
- // Loop reroll count; if Inc == 1, this records the scaling applied
- // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
- // If Inc is not 1, Scale = Inc.
- uint64_t Scale;
-
- // The roots themselves.
- SmallVector<DAGRootSet,16> RootSets;
-
- // All increment instructions for IV.
- SmallInstructionVector LoopIncs;
-
- // Map of all instructions in the loop (in order) to the iterations
- // they are used in (or specially, IL_All for instructions
- // used in the loop increment mechanism).
- UsesTy Uses;
-
- // Map between induction variable and its increment
- DenseMap<Instruction *, int64_t> &IVToIncMap;
-
- TinyInstructionVector LoopControlIVs;
- };
-
- // Check if it is a compare-like instruction whose user is a branch
- bool isCompareUsedByBranch(Instruction *I) {
- auto *TI = I->getParent()->getTerminator();
- if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
- return false;
- return I->hasOneUse() && TI->getOperand(0) == I;
- };
-
- bool isLoopControlIV(Loop *L, Instruction *IV);
- void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
- void collectPossibleReductions(Loop *L,
- ReductionTracker &Reductions);
- bool reroll(Instruction *IV, Loop *L, BasicBlock *Header,
- const SCEV *BackedgeTakenCount, ReductionTracker &Reductions);
- };
-
-} // end anonymous namespace
-
-// Returns true if the provided instruction is used outside the given loop.
-// This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
-// non-loop blocks to be outside the loop.
-static bool hasUsesOutsideLoop(Instruction *I, Loop *L) {
- for (User *U : I->users()) {
- if (!L->contains(cast<Instruction>(U)))
- return true;
- }
- return false;
-}
-
-// Check if an IV is only used to control the loop. There are two cases:
-// 1. It only has one use which is loop increment, and the increment is only
-// used by comparison and the PHI (could has sext with nsw in between), and the
-// comparison is only used by branch.
-// 2. It is used by loop increment and the comparison, the loop increment is
-// only used by the PHI, and the comparison is used only by the branch.
-bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
- unsigned IVUses = IV->getNumUses();
- if (IVUses != 2 && IVUses != 1)
- return false;
-
- for (auto *User : IV->users()) {
- int32_t IncOrCmpUses = User->getNumUses();
- bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));
-
- // User can only have one or two uses.
- if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
- return false;
-
- // Case 1
- if (IVUses == 1) {
- // The only user must be the loop increment.
- // The loop increment must have two uses.
- if (IsCompInst || IncOrCmpUses != 2)
- return false;
- }
-
- // Case 2
- if (IVUses == 2 && IncOrCmpUses != 1)
- return false;
-
- // The users of the IV must be a binary operation or a comparison
- if (auto *BO = dyn_cast<BinaryOperator>(User)) {
- if (BO->getOpcode() == Instruction::Add) {
- // Loop Increment
- // User of Loop Increment should be either PHI or CMP
- for (auto *UU : User->users()) {
- if (PHINode *PN = dyn_cast<PHINode>(UU)) {
- if (PN != IV)
- return false;
- }
- // Must be a CMP or an ext (of a value with nsw) then CMP
- else {
- auto *UUser = cast<Instruction>(UU);
- // Skip SExt if we are extending an nsw value
- // TODO: Allow ZExt too
- if (BO->hasNoSignedWrap() && UUser->hasOneUse() &&
- isa<SExtInst>(UUser))
- UUser = cast<Instruction>(*(UUser->user_begin()));
- if (!isCompareUsedByBranch(UUser))
- return false;
- }
- }
- } else
- return false;
- // Compare : can only have one use, and must be branch
- } else if (!IsCompInst)
- return false;
- }
- return true;
-}
-
-// Collect the list of loop induction variables with respect to which it might
-// be possible to reroll the loop.
-void LoopReroll::collectPossibleIVs(Loop *L,
- SmallInstructionVector &PossibleIVs) {
- for (Instruction &IV : L->getHeader()->phis()) {
- if (!IV.getType()->isIntegerTy() && !IV.getType()->isPointerTy())
- continue;
-
- if (const SCEVAddRecExpr *PHISCEV =
- dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&IV))) {
- if (PHISCEV->getLoop() != L)
- continue;
- if (!PHISCEV->isAffine())
- continue;
- const auto *IncSCEV = dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE));
- if (IncSCEV) {
- IVToIncMap[&IV] = IncSCEV->getValue()->getSExtValue();
- LLVM_DEBUG(dbgs() << "LRR: Possible IV: " << IV << " = " << *PHISCEV
- << "\n");
-
- if (isLoopControlIV(L, &IV)) {
- LoopControlIVs.push_back(&IV);
- LLVM_DEBUG(dbgs() << "LRR: Loop control only IV: " << IV
- << " = " << *PHISCEV << "\n");
- } else
- PossibleIVs.push_back(&IV);
- }
- }
- }
-}
-
-// Add the remainder of the reduction-variable chain to the instruction vector
-// (the initial PHINode has already been added). If successful, the object is
-// marked as valid.
-void LoopReroll::SimpleLoopReduction::add(Loop *L) {
- assert(!Valid && "Cannot add to an already-valid chain");
-
- // The reduction variable must be a chain of single-use instructions
- // (including the PHI), except for the last value (which is used by the PHI
- // and also outside the loop).
- Instruction *C = Instructions.front();
- if (C->user_empty())
- return;
-
- do {
- C = cast<Instruction>(*C->user_begin());
- if (C->hasOneUse()) {
- if (!C->isBinaryOp())
- return;
-
- if (!(isa<PHINode>(Instructions.back()) ||
- C->isSameOperationAs(Instructions.back())))
- return;
-
- Instructions.push_back(C);
- }
- } while (C->hasOneUse());
-
- if (Instructions.size() < 2 ||
- !C->isSameOperationAs(Instructions.back()) ||
- C->use_empty())
- return;
-
- // C is now the (potential) last instruction in the reduction chain.
- for (User *U : C->users()) {
- // The only in-loop user can be the initial PHI.
- if (L->contains(cast<Instruction>(U)))
- if (cast<Instruction>(U) != Instructions.front())
- return;
- }
-
- Instructions.push_back(C);
- Valid = true;
-}
-
-// Collect the vector of possible reduction variables.
-void LoopReroll::collectPossibleReductions(Loop *L,
- ReductionTracker &Reductions) {
- BasicBlock *Header = L->getHeader();
- for (BasicBlock::iterator I = Header->begin(),
- IE = Header->getFirstInsertionPt(); I != IE; ++I) {
- if (!isa<PHINode>(I))
- continue;
- if (!I->getType()->isSingleValueType())
- continue;
-
- SimpleLoopReduction SLR(&*I, L);
- if (!SLR.valid())
- continue;
-
- LLVM_DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with "
- << SLR.size() << " chained instructions)\n");
- Reductions.addSLR(SLR);
- }
-}
-
-// Collect the set of all users of the provided root instruction. This set of
-// users contains not only the direct users of the root instruction, but also
-// all users of those users, and so on. There are two exceptions:
-//
-// 1. Instructions in the set of excluded instructions are never added to the
-// use set (even if they are users). This is used, for example, to exclude
-// including root increments in the use set of the primary IV.
-//
-// 2. Instructions in the set of final instructions are added to the use set
-// if they are users, but their users are not added. This is used, for
-// example, to prevent a reduction update from forcing all later reduction
-// updates into the use set.
-void LoopReroll::DAGRootTracker::collectInLoopUserSet(
- Instruction *Root, const SmallInstructionSet &Exclude,
- const SmallInstructionSet &Final,
- DenseSet<Instruction *> &Users) {
- SmallInstructionVector Queue(1, Root);
- while (!Queue.empty()) {
- Instruction *I = Queue.pop_back_val();
- if (!Users.insert(I).second)
- continue;
-
- if (!Final.count(I))
- for (Use &U : I->uses()) {
- Instruction *User = cast<Instruction>(U.getUser());
- if (PHINode *PN = dyn_cast<PHINode>(User)) {
- // Ignore "wrap-around" uses to PHIs of this loop's header.
- if (PN->getIncomingBlock(U) == L->getHeader())
- continue;
- }
-
- if (L->contains(User) && !Exclude.count(User)) {
- Queue.push_back(User);
- }
- }
-
- // We also want to collect single-user "feeder" values.
- for (Use &U : I->operands()) {
- if (Instruction *Op = dyn_cast<Instruction>(U))
- if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
- !Final.count(Op))
- Queue.push_back(Op);
- }
- }
-}
-
-// Collect all of the users of all of the provided root instructions (combined
-// into a single set).
-void LoopReroll::DAGRootTracker::collectInLoopUserSet(
- const SmallInstructionVector &Roots,
- const SmallInstructionSet &Exclude,
- const SmallInstructionSet &Final,
- DenseSet<Instruction *> &Users) {
- for (Instruction *Root : Roots)
- collectInLoopUserSet(Root, Exclude, Final, Users);
-}
-
-static bool isUnorderedLoadStore(Instruction *I) {
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- return LI->isUnordered();
- if (StoreInst *SI = dyn_cast<StoreInst>(I))
- return SI->isUnordered();
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
- return !MI->isVolatile();
- return false;
-}
-
-/// Return true if IVU is a "simple" arithmetic operation.
-/// This is used for narrowing the search space for DAGRoots; only arithmetic
-/// and GEPs can be part of a DAGRoot.
-static bool isSimpleArithmeticOp(User *IVU) {
- if (Instruction *I = dyn_cast<Instruction>(IVU)) {
- switch (I->getOpcode()) {
- default: return false;
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- case Instruction::Shl:
- case Instruction::AShr:
- case Instruction::LShr:
- case Instruction::GetElementPtr:
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- return true;
- }
- }
- return false;
-}
-
-static bool isLoopIncrement(User *U, Instruction *IV) {
- BinaryOperator *BO = dyn_cast<BinaryOperator>(U);
-
- if ((BO && BO->getOpcode() != Instruction::Add) ||
- (!BO && !isa<GetElementPtrInst>(U)))
- return false;
-
- for (auto *UU : U->users()) {
- PHINode *PN = dyn_cast<PHINode>(UU);
- if (PN && PN == IV)
- return true;
- }
- return false;
-}
-
-bool LoopReroll::DAGRootTracker::
-collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
- SmallInstructionVector BaseUsers;
-
- for (auto *I : Base->users()) {
- ConstantInt *CI = nullptr;
-
- if (isLoopIncrement(I, IV)) {
- LoopIncs.push_back(cast<Instruction>(I));
- continue;
- }
-
- // The root nodes must be either GEPs, ORs or ADDs.
- if (auto *BO = dyn_cast<BinaryOperator>(I)) {
- if (BO->getOpcode() == Instruction::Add ||
- BO->getOpcode() == Instruction::Or)
- CI = dyn_cast<ConstantInt>(BO->getOperand(1));
- } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
- Value *LastOperand = GEP->getOperand(GEP->getNumOperands()-1);
- CI = dyn_cast<ConstantInt>(LastOperand);
- }
-
- if (!CI) {
- if (Instruction *II = dyn_cast<Instruction>(I)) {
- BaseUsers.push_back(II);
- continue;
- } else {
- LLVM_DEBUG(dbgs() << "LRR: Aborting due to non-instruction: " << *I
- << "\n");
- return false;
- }
- }
-
- int64_t V = std::abs(CI->getValue().getSExtValue());
- if (Roots.find(V) != Roots.end())
- // No duplicates, please.
- return false;
-
- Roots[V] = cast<Instruction>(I);
- }
-
- // Make sure we have at least two roots.
- if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty()))
- return false;
-
- // If we found non-loop-inc, non-root users of Base, assume they are
- // for the zeroth root index. This is because "add %a, 0" gets optimized
- // away.
- if (BaseUsers.size()) {
- if (Roots.find(0) != Roots.end()) {
- LLVM_DEBUG(dbgs() << "LRR: Multiple roots found for base - aborting!\n");
- return false;
- }
- Roots[0] = Base;
- }
-
- // Calculate the number of users of the base, or lowest indexed, iteration.
- unsigned NumBaseUses = BaseUsers.size();
- if (NumBaseUses == 0)
- NumBaseUses = Roots.begin()->second->getNumUses();
-
- // Check that every node has the same number of users.
- for (auto &KV : Roots) {
- if (KV.first == 0)
- continue;
- if (!KV.second->hasNUses(NumBaseUses)) {
- LLVM_DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: "
- << "#Base=" << NumBaseUses
- << ", #Root=" << KV.second->getNumUses() << "\n");
- return false;
- }
- }
-
- return true;
-}
-
-void LoopReroll::DAGRootTracker::
-findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
- // Does the user look like it could be part of a root set?
- // All its users must be simple arithmetic ops.
- if (I->hasNUsesOrMore(IL_MaxRerollIterations + 1))
- return;
-
- if (I != IV && findRootsBase(I, SubsumedInsts))
- return;
-
- SubsumedInsts.insert(I);
-
- for (User *V : I->users()) {
- Instruction *I = cast<Instruction>(V);
- if (is_contained(LoopIncs, I))
- continue;
-
- if (!isSimpleArithmeticOp(I))
- continue;
-
- // The recursive call makes a copy of SubsumedInsts.
- findRootsRecursive(I, SubsumedInsts);
- }
-}
-
-bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
- if (DRS.Roots.empty())
- return false;
-
- // If the value of the base instruction is used outside the loop, we cannot
- // reroll the loop. Check for other root instructions is unnecessary because
- // they don't match any base instructions if their values are used outside.
- if (hasUsesOutsideLoop(DRS.BaseInst, L))
- return false;
-
- // Consider a DAGRootSet with N-1 roots (so N different values including
- // BaseInst).
- // Define d = Roots[0] - BaseInst, which should be the same as
- // Roots[I] - Roots[I-1] for all I in [1..N).
- // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
- // loop iteration J.
- //
- // Now, For the loop iterations to be consecutive:
- // D = d * N
- const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
- if (!ADR)
- return false;
-
- // Check that the first root is evenly spaced.
- unsigned N = DRS.Roots.size() + 1;
- const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
- if (isa<SCEVCouldNotCompute>(StepSCEV) || StepSCEV->getType()->isPointerTy())
- return false;
- const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
- if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
- return false;
-
- // Check that the remainling roots are evenly spaced.
- for (unsigned i = 1; i < N - 1; ++i) {
- const SCEV *NewStepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[i]),
- SE->getSCEV(DRS.Roots[i-1]));
- if (NewStepSCEV != StepSCEV)
- return false;
- }
-
- return true;
-}
-
-bool LoopReroll::DAGRootTracker::
-findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
- // The base of a RootSet must be an AddRec, so it can be erased.
- const auto *IVU_ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVU));
- if (!IVU_ADR || IVU_ADR->getLoop() != L)
- return false;
-
- std::map<int64_t, Instruction*> V;
- if (!collectPossibleRoots(IVU, V))
- return false;
-
- // If we didn't get a root for index zero, then IVU must be
- // subsumed.
- if (V.find(0) == V.end())
- SubsumedInsts.insert(IVU);
-
- // Partition the vector into monotonically increasing indexes.
- DAGRootSet DRS;
- DRS.BaseInst = nullptr;
-
- SmallVector<DAGRootSet, 16> PotentialRootSets;
-
- for (auto &KV : V) {
- if (!DRS.BaseInst) {
- DRS.BaseInst = KV.second;
- DRS.SubsumedInsts = SubsumedInsts;
- } else if (DRS.Roots.empty()) {
- DRS.Roots.push_back(KV.second);
- } else if (V.find(KV.first - 1) != V.end()) {
- DRS.Roots.push_back(KV.second);
- } else {
- // Linear sequence terminated.
- if (!validateRootSet(DRS))
- return false;
-
- // Construct a new DAGRootSet with the next sequence.
- PotentialRootSets.push_back(DRS);
- DRS.BaseInst = KV.second;
- DRS.Roots.clear();
- }
- }
-
- if (!validateRootSet(DRS))
- return false;
-
- PotentialRootSets.push_back(DRS);
-
- RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end());
-
- return true;
-}
-
-bool LoopReroll::DAGRootTracker::findRoots() {
- Inc = IVToIncMap[IV];
-
- assert(RootSets.empty() && "Unclean state!");
- if (std::abs(Inc) == 1) {
- for (auto *IVU : IV->users()) {
- if (isLoopIncrement(IVU, IV))
- LoopIncs.push_back(cast<Instruction>(IVU));
- }
- findRootsRecursive(IV, SmallInstructionSet());
- LoopIncs.push_back(IV);
- } else {
- if (!findRootsBase(IV, SmallInstructionSet()))
- return false;
- }
-
- // Ensure all sets have the same size.
- if (RootSets.empty()) {
- LLVM_DEBUG(dbgs() << "LRR: Aborting because no root sets found!\n");
- return false;
- }
- for (auto &V : RootSets) {
- if (V.Roots.empty() || V.Roots.size() != RootSets[0].Roots.size()) {
- LLVM_DEBUG(
- dbgs()
- << "LRR: Aborting because not all root sets have the same size\n");
- return false;
- }
- }
-
- Scale = RootSets[0].Roots.size() + 1;
-
- if (Scale > IL_MaxRerollIterations) {
- LLVM_DEBUG(dbgs() << "LRR: Aborting - too many iterations found. "
- << "#Found=" << Scale
- << ", #Max=" << IL_MaxRerollIterations << "\n");
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "LRR: Successfully found roots: Scale=" << Scale
- << "\n");
-
- return true;
-}
-
-bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &PossibleRedSet) {
- // Populate the MapVector with all instructions in the block, in order first,
- // so we can iterate over the contents later in perfect order.
- for (auto &I : *L->getHeader()) {
- Uses[&I].resize(IL_End);
- }
-
- SmallInstructionSet Exclude;
- for (auto &DRS : RootSets) {
- Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
- Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
- Exclude.insert(DRS.BaseInst);
- }
- Exclude.insert(LoopIncs.begin(), LoopIncs.end());
-
- for (auto &DRS : RootSets) {
- DenseSet<Instruction*> VBase;
- collectInLoopUserSet(DRS.BaseInst, Exclude, PossibleRedSet, VBase);
- for (auto *I : VBase) {
- Uses[I].set(0);
- }
-
- unsigned Idx = 1;
- for (auto *Root : DRS.Roots) {
- DenseSet<Instruction*> V;
- collectInLoopUserSet(Root, Exclude, PossibleRedSet, V);
-
- // While we're here, check the use sets are the same size.
- if (V.size() != VBase.size()) {
- LLVM_DEBUG(dbgs() << "LRR: Aborting - use sets are different sizes\n");
- return false;
- }
-
- for (auto *I : V) {
- Uses[I].set(Idx);
- }
- ++Idx;
- }
-
- // Make sure our subsumed instructions are remembered too.
- for (auto *I : DRS.SubsumedInsts) {
- Uses[I].set(IL_All);
- }
- }
-
- // Make sure the loop increments are also accounted for.
-
- Exclude.clear();
- for (auto &DRS : RootSets) {
- Exclude.insert(DRS.Roots.begin(), DRS.Roots.end());
- Exclude.insert(DRS.SubsumedInsts.begin(), DRS.SubsumedInsts.end());
- Exclude.insert(DRS.BaseInst);
- }
-
- DenseSet<Instruction*> V;
- collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
- for (auto *I : V) {
- if (I->mayHaveSideEffects()) {
- LLVM_DEBUG(dbgs() << "LRR: Aborting - "
- << "An instruction which does not belong to any root "
- << "sets must not have side effects: " << *I);
- return false;
- }
- Uses[I].set(IL_All);
- }
-
- return true;
-}
-
-/// Get the next instruction in "In" that is a member of set Val.
-/// Start searching from StartI, and do not return anything in Exclude.
-/// If StartI is not given, start from In.begin().
-LoopReroll::DAGRootTracker::UsesTy::iterator
-LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
- const SmallInstructionSet &Exclude,
- UsesTy::iterator *StartI) {
- UsesTy::iterator I = StartI ? *StartI : In.begin();
- while (I != In.end() && (I->second.test(Val) == 0 ||
- Exclude.contains(I->first)))
- ++I;
- return I;
-}
-
-bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) {
- for (auto &DRS : RootSets) {
- if (DRS.BaseInst == I)
- return true;
- }
- return false;
-}
-
-bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
- for (auto &DRS : RootSets) {
- if (is_contained(DRS.Roots, I))
- return true;
- }
- return false;
-}
-
-/// Return true if instruction I depends on any instruction between
-/// Start and End.
-bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
- UsesTy::iterator Start,
- UsesTy::iterator End) {
- for (auto *U : I->users()) {
- for (auto It = Start; It != End; ++It)
- if (U == It->first)
- return true;
- }
- return false;
-}
-
-static bool isIgnorableInst(const Instruction *I) {
- if (isa<DbgInfoIntrinsic>(I))
- return true;
- const IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
- if (!II)
- return false;
- switch (II->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::annotation:
- case Intrinsic::ptr_annotation:
- case Intrinsic::var_annotation:
- // TODO: the following intrinsics may also be allowed:
- // lifetime_start, lifetime_end, invariant_start, invariant_end
- return true;
- }
- return false;
-}
-
-bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
- // We now need to check for equivalence of the use graph of each root with
- // that of the primary induction variable (excluding the roots). Our goal
- // here is not to solve the full graph isomorphism problem, but rather to
- // catch common cases without a lot of work. As a result, we will assume
- // that the relative order of the instructions in each unrolled iteration
- // is the same (although we will not make an assumption about how the
- // different iterations are intermixed). Note that while the order must be
- // the same, the instructions may not be in the same basic block.
-
- // An array of just the possible reductions for this scale factor. When we
- // collect the set of all users of some root instructions, these reduction
- // instructions are treated as 'final' (their uses are not considered).
- // This is important because we don't want the root use set to search down
- // the reduction chain.
- SmallInstructionSet PossibleRedSet;
- SmallInstructionSet PossibleRedLastSet;
- SmallInstructionSet PossibleRedPHISet;
- Reductions.restrictToScale(Scale, PossibleRedSet,
- PossibleRedPHISet, PossibleRedLastSet);
-
- // Populate "Uses" with where each instruction is used.
- if (!collectUsedInstructions(PossibleRedSet))
- return false;
-
- // Make sure we mark the reduction PHIs as used in all iterations.
- for (auto *I : PossibleRedPHISet) {
- Uses[I].set(IL_All);
- }
-
- // Make sure we mark loop-control-only PHIs as used in all iterations. See
- // comment above LoopReroll::isLoopControlIV for more information.
- BasicBlock *Header = L->getHeader();
- for (Instruction *LoopControlIV : LoopControlIVs) {
- for (auto *U : LoopControlIV->users()) {
- Instruction *IVUser = dyn_cast<Instruction>(U);
- // IVUser could be loop increment or compare
- Uses[IVUser].set(IL_All);
- for (auto *UU : IVUser->users()) {
- Instruction *UUser = dyn_cast<Instruction>(UU);
- // UUser could be compare, PHI or branch
- Uses[UUser].set(IL_All);
- // Skip SExt
- if (isa<SExtInst>(UUser)) {
- UUser = dyn_cast<Instruction>(*(UUser->user_begin()));
- Uses[UUser].set(IL_All);
- }
- // Is UUser a compare instruction?
- if (UU->hasOneUse()) {
- Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
- if (BI == cast<BranchInst>(Header->getTerminator()))
- Uses[BI].set(IL_All);
- }
- }
- }
- }
-
- // Make sure all instructions in the loop are in one and only one
- // set.
- for (auto &KV : Uses) {
- if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) {
- LLVM_DEBUG(
- dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
- << *KV.first << " (#uses=" << KV.second.count() << ")\n");
- return false;
- }
- }
-
- LLVM_DEBUG(for (auto &KV
- : Uses) {
- dbgs() << "LRR: " << KV.second.find_first() << "\t" << *KV.first << "\n";
- });
-
- BatchAAResults BatchAA(*AA);
- for (unsigned Iter = 1; Iter < Scale; ++Iter) {
- // In addition to regular aliasing information, we need to look for
- // instructions from later (future) iterations that have side effects
- // preventing us from reordering them past other instructions with side
- // effects.
- bool FutureSideEffects = false;
- AliasSetTracker AST(BatchAA);
- // The map between instructions in f(%iv.(i+1)) and f(%iv).
- DenseMap<Value *, Value *> BaseMap;
-
- // Compare iteration Iter to the base.
- SmallInstructionSet Visited;
- auto BaseIt = nextInstr(0, Uses, Visited);
- auto RootIt = nextInstr(Iter, Uses, Visited);
- auto LastRootIt = Uses.begin();
-
- while (BaseIt != Uses.end() && RootIt != Uses.end()) {
- Instruction *BaseInst = BaseIt->first;
- Instruction *RootInst = RootIt->first;
-
- // Skip over the IV or root instructions; only match their users.
- bool Continue = false;
- if (isBaseInst(BaseInst)) {
- Visited.insert(BaseInst);
- BaseIt = nextInstr(0, Uses, Visited);
- Continue = true;
- }
- if (isRootInst(RootInst)) {
- LastRootIt = RootIt;
- Visited.insert(RootInst);
- RootIt = nextInstr(Iter, Uses, Visited);
- Continue = true;
- }
- if (Continue) continue;
-
- if (!BaseInst->isSameOperationAs(RootInst)) {
- // Last chance saloon. We don't try and solve the full isomorphism
- // problem, but try and at least catch the case where two instructions
- // *of different types* are round the wrong way. We won't be able to
- // efficiently tell, given two ADD instructions, which way around we
- // should match them, but given an ADD and a SUB, we can at least infer
- // which one is which.
- //
- // This should allow us to deal with a greater subset of the isomorphism
- // problem. It does however change a linear algorithm into a quadratic
- // one, so limit the number of probes we do.
- auto TryIt = RootIt;
- unsigned N = NumToleratedFailedMatches;
- while (TryIt != Uses.end() &&
- !BaseInst->isSameOperationAs(TryIt->first) &&
- N--) {
- ++TryIt;
- TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
- }
-
- if (TryIt == Uses.end() || TryIt == RootIt ||
- instrDependsOn(TryIt->first, RootIt, TryIt)) {
- LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
- << *BaseInst << " vs. " << *RootInst << "\n");
- return false;
- }
-
- RootIt = TryIt;
- RootInst = TryIt->first;
- }
-
- // All instructions between the last root and this root
- // may belong to some other iteration. If they belong to a
- // future iteration, then they're dangerous to alias with.
- //
- // Note that because we allow a limited amount of flexibility in the order
- // that we visit nodes, LastRootIt might be *before* RootIt, in which
- // case we've already checked this set of instructions so we shouldn't
- // do anything.
- for (; LastRootIt < RootIt; ++LastRootIt) {
- Instruction *I = LastRootIt->first;
- if (LastRootIt->second.find_first() < (int)Iter)
- continue;
- if (I->mayWriteToMemory())
- AST.add(I);
- // Note: This is specifically guarded by a check on isa<PHINode>,
- // which while a valid (somewhat arbitrary) micro-optimization, is
- // needed because otherwise isSafeToSpeculativelyExecute returns
- // false on PHI nodes.
- if (!isa<PHINode>(I) && !isUnorderedLoadStore(I) &&
- !isSafeToSpeculativelyExecute(I))
- // Intervening instructions cause side effects.
- FutureSideEffects = true;
- }
-
- // Make sure that this instruction, which is in the use set of this
- // root instruction, does not also belong to the base set or the set of
- // some other root instruction.
- if (RootIt->second.count() > 1) {
- LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
- << " vs. " << *RootInst << " (prev. case overlap)\n");
- return false;
- }
-
- // Make sure that we don't alias with any instruction in the alias set
- // tracker. If we do, then we depend on a future iteration, and we
- // can't reroll.
- if (RootInst->mayReadFromMemory()) {
- for (auto &K : AST) {
- if (isModOrRefSet(K.aliasesUnknownInst(RootInst, BatchAA))) {
- LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at "
- << *BaseInst << " vs. " << *RootInst
- << " (depends on future store)\n");
- return false;
- }
- }
- }
-
- // If we've past an instruction from a future iteration that may have
- // side effects, and this instruction might also, then we can't reorder
- // them, and this matching fails. As an exception, we allow the alias
- // set tracker to handle regular (unordered) load/store dependencies.
- if (FutureSideEffects && ((!isUnorderedLoadStore(BaseInst) &&
- !isSafeToSpeculativelyExecute(BaseInst)) ||
- (!isUnorderedLoadStore(RootInst) &&
- !isSafeToSpeculativelyExecute(RootInst)))) {
- LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
- << " vs. " << *RootInst
- << " (side effects prevent reordering)\n");
- return false;
- }
-
- // For instructions that are part of a reduction, if the operation is
- // associative, then don't bother matching the operands (because we
- // already know that the instructions are isomorphic, and the order
- // within the iteration does not matter). For non-associative reductions,
- // we do need to match the operands, because we need to reject
- // out-of-order instructions within an iteration!
- // For example (assume floating-point addition), we need to reject this:
- // x += a[i]; x += b[i];
- // x += a[i+1]; x += b[i+1];
- // x += b[i+2]; x += a[i+2];
- bool InReduction = Reductions.isPairInSame(BaseInst, RootInst);
-
- if (!(InReduction && BaseInst->isAssociative())) {
- bool Swapped = false, SomeOpMatched = false;
- for (unsigned j = 0; j < BaseInst->getNumOperands(); ++j) {
- Value *Op2 = RootInst->getOperand(j);
-
- // If this is part of a reduction (and the operation is not
- // associatve), then we match all operands, but not those that are
- // part of the reduction.
- if (InReduction)
- if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
- if (Reductions.isPairInSame(RootInst, Op2I))
- continue;
-
- DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
- if (BMI != BaseMap.end()) {
- Op2 = BMI->second;
- } else {
- for (auto &DRS : RootSets) {
- if (DRS.Roots[Iter-1] == (Instruction*) Op2) {
- Op2 = DRS.BaseInst;
- break;
- }
- }
- }
-
- if (BaseInst->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
- // If we've not already decided to swap the matched operands, and
- // we've not already matched our first operand (note that we could
- // have skipped matching the first operand because it is part of a
- // reduction above), and the instruction is commutative, then try
- // the swapped match.
- if (!Swapped && BaseInst->isCommutative() && !SomeOpMatched &&
- BaseInst->getOperand(!j) == Op2) {
- Swapped = true;
- } else {
- LLVM_DEBUG(dbgs()
- << "LRR: iteration root match failed at " << *BaseInst
- << " vs. " << *RootInst << " (operand " << j << ")\n");
- return false;
- }
- }
-
- SomeOpMatched = true;
- }
- }
-
- if ((!PossibleRedLastSet.count(BaseInst) &&
- hasUsesOutsideLoop(BaseInst, L)) ||
- (!PossibleRedLastSet.count(RootInst) &&
- hasUsesOutsideLoop(RootInst, L))) {
- LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst
- << " vs. " << *RootInst << " (uses outside loop)\n");
- return false;
- }
-
- Reductions.recordPair(BaseInst, RootInst, Iter);
- BaseMap.insert(std::make_pair(RootInst, BaseInst));
-
- LastRootIt = RootIt;
- Visited.insert(BaseInst);
- Visited.insert(RootInst);
- BaseIt = nextInstr(0, Uses, Visited);
- RootIt = nextInstr(Iter, Uses, Visited);
- }
- assert(BaseIt == Uses.end() && RootIt == Uses.end() &&
- "Mismatched set sizes!");
- }
-
- LLVM_DEBUG(dbgs() << "LRR: Matched all iteration increments for " << *IV
- << "\n");
-
- return true;
-}
-
-void LoopReroll::DAGRootTracker::replace(const SCEV *BackedgeTakenCount) {
- BasicBlock *Header = L->getHeader();
-
- // Compute the start and increment for each BaseInst before we start erasing
- // instructions.
- SmallVector<const SCEV *, 8> StartExprs;
- SmallVector<const SCEV *, 8> IncrExprs;
- for (auto &DRS : RootSets) {
- const SCEVAddRecExpr *IVSCEV =
- cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
- StartExprs.push_back(IVSCEV->getStart());
- IncrExprs.push_back(SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), IVSCEV));
- }
-
- // Remove instructions associated with non-base iterations.
- for (Instruction &Inst : llvm::make_early_inc_range(llvm::reverse(*Header))) {
- unsigned I = Uses[&Inst].find_first();
- if (I > 0 && I < IL_All) {
- LLVM_DEBUG(dbgs() << "LRR: removing: " << Inst << "\n");
- Inst.eraseFromParent();
- }
- }
-
- // Rewrite each BaseInst using SCEV.
- for (size_t i = 0, e = RootSets.size(); i != e; ++i)
- // Insert the new induction variable.
- replaceIV(RootSets[i], StartExprs[i], IncrExprs[i]);
-
- { // Limit the lifetime of SCEVExpander.
- BranchInst *BI = cast<BranchInst>(Header->getTerminator());
- const DataLayout &DL = Header->getModule()->getDataLayout();
- SCEVExpander Expander(*SE, DL, "reroll");
- auto Zero = SE->getZero(BackedgeTakenCount->getType());
- auto One = SE->getOne(BackedgeTakenCount->getType());
- auto NewIVSCEV = SE->getAddRecExpr(Zero, One, L, SCEV::FlagAnyWrap);
- Value *NewIV =
- Expander.expandCodeFor(NewIVSCEV, BackedgeTakenCount->getType(),
- Header->getFirstNonPHIOrDbg());
- // FIXME: This arithmetic can overflow.
- auto TripCount = SE->getAddExpr(BackedgeTakenCount, One);
- auto ScaledTripCount = SE->getMulExpr(
- TripCount, SE->getConstant(BackedgeTakenCount->getType(), Scale));
- auto ScaledBECount = SE->getMinusSCEV(ScaledTripCount, One);
- Value *TakenCount =
- Expander.expandCodeFor(ScaledBECount, BackedgeTakenCount->getType(),
- Header->getFirstNonPHIOrDbg());
- Value *Cond =
- new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, TakenCount, "exitcond");
- BI->setCondition(Cond);
-
- if (BI->getSuccessor(1) != Header)
- BI->swapSuccessors();
- }
-
- SimplifyInstructionsInBlock(Header, TLI);
- DeleteDeadPHIs(Header, TLI);
-}
-
-void LoopReroll::DAGRootTracker::replaceIV(DAGRootSet &DRS,
- const SCEV *Start,
- const SCEV *IncrExpr) {
- BasicBlock *Header = L->getHeader();
- Instruction *Inst = DRS.BaseInst;
-
- const SCEV *NewIVSCEV =
- SE->getAddRecExpr(Start, IncrExpr, L, SCEV::FlagAnyWrap);
-
- { // Limit the lifetime of SCEVExpander.
- const DataLayout &DL = Header->getModule()->getDataLayout();
- SCEVExpander Expander(*SE, DL, "reroll");
- Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(),
- Header->getFirstNonPHIOrDbg());
-
- for (auto &KV : Uses)
- if (KV.second.find_first() == 0)
- KV.first->replaceUsesOfWith(Inst, NewIV);
- }
-}
-
-// Validate the selected reductions. All iterations must have an isomorphic
-// part of the reduction chain and, for non-associative reductions, the chain
-// entries must appear in order.
-bool LoopReroll::ReductionTracker::validateSelected() {
- // For a non-associative reduction, the chain entries must appear in order.
- for (int i : Reds) {
- int PrevIter = 0, BaseCount = 0, Count = 0;
- for (Instruction *J : PossibleReds[i]) {
- // Note that all instructions in the chain must have been found because
- // all instructions in the function must have been assigned to some
- // iteration.
- int Iter = PossibleRedIter[J];
- if (Iter != PrevIter && Iter != PrevIter + 1 &&
- !PossibleReds[i].getReducedValue()->isAssociative()) {
- LLVM_DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: "
- << J << "\n");
- return false;
- }
-
- if (Iter != PrevIter) {
- if (Count != BaseCount) {
- LLVM_DEBUG(dbgs()
- << "LRR: Iteration " << PrevIter << " reduction use count "
- << Count << " is not equal to the base use count "
- << BaseCount << "\n");
- return false;
- }
-
- Count = 0;
- }
-
- ++Count;
- if (Iter == 0)
- ++BaseCount;
-
- PrevIter = Iter;
- }
- }
-
- return true;
-}
-
-// For all selected reductions, remove all parts except those in the first
-// iteration (and the PHI). Replace outside uses of the reduced value with uses
-// of the first-iteration reduced value (in other words, reroll the selected
-// reductions).
-void LoopReroll::ReductionTracker::replaceSelected() {
- // Fixup reductions to refer to the last instruction associated with the
- // first iteration (not the last).
- for (int i : Reds) {
- int j = 0;
- for (int e = PossibleReds[i].size(); j != e; ++j)
- if (PossibleRedIter[PossibleReds[i][j]] != 0) {
- --j;
- break;
- }
-
- // Replace users with the new end-of-chain value.
- SmallInstructionVector Users;
- for (User *U : PossibleReds[i].getReducedValue()->users()) {
- Users.push_back(cast<Instruction>(U));
- }
-
- for (Instruction *User : Users)
- User->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
- PossibleReds[i][j]);
- }
-}
-
-// Reroll the provided loop with respect to the provided induction variable.
-// Generally, we're looking for a loop like this:
-//
-// %iv = phi [ (preheader, ...), (body, %iv.next) ]
-// f(%iv)
-// %iv.1 = add %iv, 1 <-- a root increment
-// f(%iv.1)
-// %iv.2 = add %iv, 2 <-- a root increment
-// f(%iv.2)
-// %iv.scale_m_1 = add %iv, scale-1 <-- a root increment
-// f(%iv.scale_m_1)
-// ...
-// %iv.next = add %iv, scale
-// %cmp = icmp(%iv, ...)
-// br %cmp, header, exit
-//
-// Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
-// instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
-// be intermixed with eachother. The restriction imposed by this algorithm is
-// that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
-// etc. be the same.
-//
-// First, we collect the use set of %iv, excluding the other increment roots.
-// This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
-// times, having collected the use set of f(%iv.(i+1)), during which we:
-// - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
-// the next unmatched instruction in f(%iv.(i+1)).
-// - Ensure that both matched instructions don't have any external users
-// (with the exception of last-in-chain reduction instructions).
-// - Track the (aliasing) write set, and other side effects, of all
-// instructions that belong to future iterations that come before the matched
-// instructions. If the matched instructions read from that write set, then
-// f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
-// f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
-// if any of these future instructions had side effects (could not be
-// speculatively executed), and so do the matched instructions, when we
-// cannot reorder those side-effect-producing instructions, and rerolling
-// fails.
-//
-// Finally, we make sure that all loop instructions are either loop increment
-// roots, belong to simple latch code, parts of validated reductions, part of
-// f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
-// have been validated), then we reroll the loop.
-bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
- const SCEV *BackedgeTakenCount,
- ReductionTracker &Reductions) {
- DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
- IVToIncMap, LoopControlIVs);
-
- if (!DAGRoots.findRoots())
- return false;
- LLVM_DEBUG(dbgs() << "LRR: Found all root induction increments for: " << *IV
- << "\n");
-
- if (!DAGRoots.validate(Reductions))
- return false;
- if (!Reductions.validateSelected())
- return false;
- // At this point, we've validated the rerolling, and we're committed to
- // making changes!
-
- Reductions.replaceSelected();
- DAGRoots.replace(BackedgeTakenCount);
-
- ++NumRerolledLoops;
- return true;
-}
-
-bool LoopReroll::runOnLoop(Loop *L) {
- BasicBlock *Header = L->getHeader();
- LLVM_DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() << "] Loop %"
- << Header->getName() << " (" << L->getNumBlocks()
- << " block(s))\n");
-
- // For now, we'll handle only single BB loops.
- if (L->getNumBlocks() > 1)
- return false;
-
- if (!SE->hasLoopInvariantBackedgeTakenCount(L))
- return false;
-
- const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
- LLVM_DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n");
- LLVM_DEBUG(dbgs() << "LRR: backedge-taken count = " << *BackedgeTakenCount
- << "\n");
-
- // First, we need to find the induction variable with respect to which we can
- // reroll (there may be several possible options).
- SmallInstructionVector PossibleIVs;
- IVToIncMap.clear();
- LoopControlIVs.clear();
- collectPossibleIVs(L, PossibleIVs);
-
- if (PossibleIVs.empty()) {
- LLVM_DEBUG(dbgs() << "LRR: No possible IVs found\n");
- return false;
- }
-
- ReductionTracker Reductions;
- collectPossibleReductions(L, Reductions);
- bool Changed = false;
-
- // For each possible IV, collect the associated possible set of 'root' nodes
- // (i+1, i+2, etc.).
- for (Instruction *PossibleIV : PossibleIVs)
- if (reroll(PossibleIV, L, Header, BackedgeTakenCount, Reductions)) {
- Changed = true;
- break;
- }
- LLVM_DEBUG(dbgs() << "\n After Reroll:\n" << *(L->getHeader()) << "\n");
-
- // Trip count of L has changed so SE must be re-evaluated.
- if (Changed)
- SE->forgetLoop(L);
-
- return Changed;
-}
-
-PreservedAnalyses LoopRerollPass::run(Loop &L, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &AR,
- LPMUpdater &U) {
- return LoopReroll(&AR.AA, &AR.LI, &AR.SE, &AR.TLI, &AR.DT, true).runOnLoop(&L)
- ? getLoopPassPreservedAnalyses()
- : PreservedAnalyses::all();
-}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index eee855058706..acb79e94d087 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -64,11 +64,12 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
// Vectorization requires loop-rotation. Use default threshold for loops the
// user explicitly marked for vectorization, even when header duplication is
// disabled.
- int Threshold = EnableHeaderDuplication ||
- hasVectorizeTransformation(&L) == TM_ForcedByUser
- ? DefaultRotationThreshold
- : 0;
- const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
+ int Threshold =
+ (EnableHeaderDuplication && !L.getHeader()->getParent()->hasMinSize()) ||
+ hasVectorizeTransformation(&L) == TM_ForcedByUser
+ ? DefaultRotationThreshold
+ : 0;
+ const DataLayout &DL = L.getHeader()->getDataLayout();
const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL);
std::optional<MemorySSAUpdater> MSSAU;
@@ -89,79 +90,3 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
PA.preserve<MemorySSAAnalysis>();
return PA;
}
-
-namespace {
-
-class LoopRotateLegacyPass : public LoopPass {
- unsigned MaxHeaderSize;
- bool PrepareForLTO;
-
-public:
- static char ID; // Pass ID, replacement for typeid
- LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1,
- bool PrepareForLTO = false)
- : LoopPass(ID), PrepareForLTO(PrepareForLTO) {
- initializeLoopRotateLegacyPassPass(*PassRegistry::getPassRegistry());
- if (SpecifiedMaxHeaderSize == -1)
- MaxHeaderSize = DefaultRotationThreshold;
- else
- MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
- }
-
- // LCSSA form makes instruction renaming easier.
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- getLoopAnalysisUsage(AU);
-
- // Lazy BFI and BPI are marked as preserved here so LoopRotate
- // can remain part of the same loop pass manager as LICM.
- AU.addPreserved<LazyBlockFrequencyInfoPass>();
- AU.addPreserved<LazyBranchProbabilityInfoPass>();
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipLoop(L))
- return false;
- Function &F = *L->getHeader()->getParent();
-
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);
- std::optional<MemorySSAUpdater> MSSAU;
- // Not requiring MemorySSA and getting it only if available will split
- // the loop pass pipeline when LoopRotate is being run first.
- auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
- if (MSSAA)
- MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
- // Vectorization requires loop-rotation. Use default threshold for loops the
- // user explicitly marked for vectorization, even when header duplication is
- // disabled.
- int Threshold = hasVectorizeTransformation(L) == TM_ForcedByUser
- ? DefaultRotationThreshold
- : MaxHeaderSize;
-
- return LoopRotation(L, LI, TTI, AC, &DT, &SE, MSSAU ? &*MSSAU : nullptr, SQ,
- false, Threshold, false,
- PrepareForLTO || PrepareForLTOOption);
- }
-};
-} // end namespace
-
-char LoopRotateLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false,
- false)
-
-Pass *llvm::createLoopRotatePass(int MaxHeaderSize, bool PrepareForLTO) {
- return new LoopRotateLegacyPass(MaxHeaderSize, PrepareForLTO);
-}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 028a487ecdbc..ae9103d0608a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -16,7 +16,6 @@
#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 7ebc5da8b25a..11f9f7822a15 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -193,10 +193,18 @@ static cl::opt<cl::boolOrDefault> AllowTerminatingConditionFoldingAfterLSR(
"lsr-term-fold", cl::Hidden,
cl::desc("Attempt to replace primary IV with other IV."));
-static cl::opt<bool> AllowDropSolutionIfLessProfitable(
- "lsr-drop-solution", cl::Hidden, cl::init(false),
+static cl::opt<cl::boolOrDefault> AllowDropSolutionIfLessProfitable(
+ "lsr-drop-solution", cl::Hidden,
cl::desc("Attempt to drop solution if it is less profitable"));
+static cl::opt<bool> EnableVScaleImmediates(
+ "lsr-enable-vscale-immediates", cl::Hidden, cl::init(true),
+ cl::desc("Enable analysis of vscale-relative immediates in LSR"));
+
+static cl::opt<bool> DropScaledForVScale(
+ "lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true),
+ cl::desc("Avoid using scaled registers with vscale-relative addressing"));
+
STATISTIC(NumTermFold,
"Number of terminating condition fold recognized and performed");
@@ -247,6 +255,126 @@ public:
void dump() const;
};
+// An offset from an address that is either scalable or fixed. Used for
+// per-target optimizations of addressing modes.
+class Immediate : public details::FixedOrScalableQuantity<Immediate, int64_t> {
+ constexpr Immediate(ScalarTy MinVal, bool Scalable)
+ : FixedOrScalableQuantity(MinVal, Scalable) {}
+
+ constexpr Immediate(const FixedOrScalableQuantity<Immediate, int64_t> &V)
+ : FixedOrScalableQuantity(V) {}
+
+public:
+ constexpr Immediate() = delete;
+
+ static constexpr Immediate getFixed(ScalarTy MinVal) {
+ return {MinVal, false};
+ }
+ static constexpr Immediate getScalable(ScalarTy MinVal) {
+ return {MinVal, true};
+ }
+ static constexpr Immediate get(ScalarTy MinVal, bool Scalable) {
+ return {MinVal, Scalable};
+ }
+ static constexpr Immediate getZero() { return {0, false}; }
+ static constexpr Immediate getFixedMin() {
+ return {std::numeric_limits<int64_t>::min(), false};
+ }
+ static constexpr Immediate getFixedMax() {
+ return {std::numeric_limits<int64_t>::max(), false};
+ }
+ static constexpr Immediate getScalableMin() {
+ return {std::numeric_limits<int64_t>::min(), true};
+ }
+ static constexpr Immediate getScalableMax() {
+ return {std::numeric_limits<int64_t>::max(), true};
+ }
+
+ constexpr bool isLessThanZero() const { return Quantity < 0; }
+
+ constexpr bool isGreaterThanZero() const { return Quantity > 0; }
+
+ constexpr bool isCompatibleImmediate(const Immediate &Imm) const {
+ return isZero() || Imm.isZero() || Imm.Scalable == Scalable;
+ }
+
+ constexpr bool isMin() const {
+ return Quantity == std::numeric_limits<ScalarTy>::min();
+ }
+
+ constexpr bool isMax() const {
+ return Quantity == std::numeric_limits<ScalarTy>::max();
+ }
+
+ // Arithmetic 'operators' that cast to unsigned types first.
+ constexpr Immediate addUnsigned(const Immediate &RHS) const {
+ assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");
+ ScalarTy Value = (uint64_t)Quantity + RHS.getKnownMinValue();
+ return {Value, Scalable || RHS.isScalable()};
+ }
+
+ constexpr Immediate subUnsigned(const Immediate &RHS) const {
+ assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");
+ ScalarTy Value = (uint64_t)Quantity - RHS.getKnownMinValue();
+ return {Value, Scalable || RHS.isScalable()};
+ }
+
+ // Scale the quantity by a constant without caring about runtime scalability.
+ constexpr Immediate mulUnsigned(const ScalarTy RHS) const {
+ ScalarTy Value = (uint64_t)Quantity * RHS;
+ return {Value, Scalable};
+ }
+
+ // Helpers for generating SCEVs with vscale terms where needed.
+ const SCEV *getSCEV(ScalarEvolution &SE, Type *Ty) const {
+ const SCEV *S = SE.getConstant(Ty, Quantity);
+ if (Scalable)
+ S = SE.getMulExpr(S, SE.getVScale(S->getType()));
+ return S;
+ }
+
+ const SCEV *getNegativeSCEV(ScalarEvolution &SE, Type *Ty) const {
+ const SCEV *NegS = SE.getConstant(Ty, -(uint64_t)Quantity);
+ if (Scalable)
+ NegS = SE.getMulExpr(NegS, SE.getVScale(NegS->getType()));
+ return NegS;
+ }
+
+ const SCEV *getUnknownSCEV(ScalarEvolution &SE, Type *Ty) const {
+ const SCEV *SU = SE.getUnknown(ConstantInt::getSigned(Ty, Quantity));
+ if (Scalable)
+ SU = SE.getMulExpr(SU, SE.getVScale(SU->getType()));
+ return SU;
+ }
+};
+
+// This is needed for the Compare type of std::map when Immediate is used
+// as a key. We don't need it to be fully correct against any value of vscale,
+// just to make sure that vscale-related terms in the map are considered against
+// each other rather than being mixed up and potentially missing opportunities.
+struct KeyOrderTargetImmediate {
+ bool operator()(const Immediate &LHS, const Immediate &RHS) const {
+ if (LHS.isScalable() && !RHS.isScalable())
+ return false;
+ if (!LHS.isScalable() && RHS.isScalable())
+ return true;
+ return LHS.getKnownMinValue() < RHS.getKnownMinValue();
+ }
+};
+
+// This would be nicer if we could be generic instead of directly using size_t,
+// but there doesn't seem to be a type trait for is_orderable or
+// is_lessthan_comparable or similar.
+struct KeyOrderSizeTAndImmediate {
+ bool operator()(const std::pair<size_t, Immediate> &LHS,
+ const std::pair<size_t, Immediate> &RHS) const {
+ size_t LSize = LHS.first;
+ size_t RSize = RHS.first;
+ if (LSize != RSize)
+ return LSize < RSize;
+ return KeyOrderTargetImmediate()(LHS.second, RHS.second);
+ }
+};
} // end anonymous namespace
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -357,7 +485,7 @@ struct Formula {
GlobalValue *BaseGV = nullptr;
/// Base offset for complex addressing.
- int64_t BaseOffset = 0;
+ Immediate BaseOffset = Immediate::getZero();
/// Whether any complex addressing has a base register.
bool HasBaseReg = false;
@@ -388,7 +516,7 @@ struct Formula {
/// An additional constant offset which added near the use. This requires a
/// temporary register, but the offset itself can live in an add immediate
/// field rather than a register.
- int64_t UnfoldedOffset = 0;
+ Immediate UnfoldedOffset = Immediate::getZero();
Formula() = default;
@@ -628,7 +756,7 @@ void Formula::print(raw_ostream &OS) const {
if (!First) OS << " + "; else First = false;
BaseGV->printAsOperand(OS, /*PrintType=*/false);
}
- if (BaseOffset != 0) {
+ if (BaseOffset.isNonZero()) {
if (!First) OS << " + "; else First = false;
OS << BaseOffset;
}
@@ -652,7 +780,7 @@ void Formula::print(raw_ostream &OS) const {
OS << "<unknown>";
OS << ')';
}
- if (UnfoldedOffset != 0) {
+ if (UnfoldedOffset.isNonZero()) {
if (!First) OS << " + ";
OS << "imm(" << UnfoldedOffset << ')';
}
@@ -798,28 +926,34 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
/// If S involves the addition of a constant integer value, return that integer
/// value, and mutate S to point to a new SCEV with that value excluded.
-static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
+static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
if (C->getAPInt().getSignificantBits() <= 64) {
S = SE.getConstant(C->getType(), 0);
- return C->getValue()->getSExtValue();
+ return Immediate::getFixed(C->getValue()->getSExtValue());
}
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->operands());
- int64_t Result = ExtractImmediate(NewOps.front(), SE);
- if (Result != 0)
+ Immediate Result = ExtractImmediate(NewOps.front(), SE);
+ if (Result.isNonZero())
S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->operands());
- int64_t Result = ExtractImmediate(NewOps.front(), SE);
- if (Result != 0)
+ Immediate Result = ExtractImmediate(NewOps.front(), SE);
+ if (Result.isNonZero())
S = SE.getAddRecExpr(NewOps, AR->getLoop(),
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
SCEV::FlagAnyWrap);
return Result;
- }
- return 0;
+ } else if (EnableVScaleImmediates)
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S))
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+ if (isa<SCEVVScale>(M->getOperand(1))) {
+ S = SE.getConstant(M->getType(), 0);
+ return Immediate::getScalable(C->getValue()->getSExtValue());
+ }
+ return Immediate::getZero();
}
/// If S involves the addition of a GlobalValue address, return that symbol, and
@@ -1134,7 +1268,7 @@ struct LSRFixup {
/// A constant offset to be added to the LSRUse expression. This allows
/// multiple fixups to share the same LSRUse with different offsets, for
/// example in an unrolled loop.
- int64_t Offset = 0;
+ Immediate Offset = Immediate::getZero();
LSRFixup() = default;
@@ -1197,8 +1331,8 @@ public:
SmallVector<LSRFixup, 8> Fixups;
/// Keep track of the min and max offsets of the fixups.
- int64_t MinOffset = std::numeric_limits<int64_t>::max();
- int64_t MaxOffset = std::numeric_limits<int64_t>::min();
+ Immediate MinOffset = Immediate::getFixedMax();
+ Immediate MaxOffset = Immediate::getFixedMin();
/// This records whether all of the fixups using this LSRUse are outside of
/// the loop, in which case some special-case heuristics may be used.
@@ -1234,9 +1368,9 @@ public:
void pushFixup(LSRFixup &f) {
Fixups.push_back(f);
- if (f.Offset > MaxOffset)
+ if (Immediate::isKnownGT(f.Offset, MaxOffset))
MaxOffset = f.Offset;
- if (f.Offset < MinOffset)
+ if (Immediate::isKnownLT(f.Offset, MinOffset))
MinOffset = f.Offset;
}
@@ -1254,7 +1388,7 @@ public:
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, MemAccessTy AccessTy,
- GlobalValue *BaseGV, int64_t BaseOffset,
+ GlobalValue *BaseGV, Immediate BaseOffset,
bool HasBaseReg, int64_t Scale,
Instruction *Fixup = nullptr);
@@ -1308,9 +1442,9 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
// If the step size matches the base offset, we could use pre-indexed
// addressing.
- if (AMK == TTI::AMK_PreIndexed) {
+ if (AMK == TTI::AMK_PreIndexed && F.BaseOffset.isFixed()) {
if (auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
- if (Step->getAPInt() == F.BaseOffset)
+ if (Step->getAPInt() == F.BaseOffset.getFixedValue())
LoopCost = 0;
} else if (AMK == TTI::AMK_PostIndexed) {
const SCEV *LoopStep = AR->getStepRecurrence(*SE);
@@ -1401,27 +1535,32 @@ void Cost::RateFormula(const Formula &F,
// allows to fold 2 registers.
C.NumBaseAdds +=
NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(*TTI, LU, F)));
- C.NumBaseAdds += (F.UnfoldedOffset != 0);
+ C.NumBaseAdds += (F.UnfoldedOffset.isNonZero());
// Accumulate non-free scaling amounts.
C.ScaleCost += *getScalingFactorCost(*TTI, LU, F, *L).getValue();
// Tally up the non-zero immediates.
for (const LSRFixup &Fixup : LU.Fixups) {
- int64_t O = Fixup.Offset;
- int64_t Offset = (uint64_t)O + F.BaseOffset;
- if (F.BaseGV)
- C.ImmCost += 64; // Handle symbolic values conservatively.
- // TODO: This should probably be the pointer size.
- else if (Offset != 0)
- C.ImmCost += APInt(64, Offset, true).getSignificantBits();
-
- // Check with target if this offset with this instruction is
- // specifically not supported.
- if (LU.Kind == LSRUse::Address && Offset != 0 &&
- !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
- Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))
- C.NumBaseAdds++;
+ if (Fixup.Offset.isCompatibleImmediate(F.BaseOffset)) {
+ Immediate Offset = Fixup.Offset.addUnsigned(F.BaseOffset);
+ if (F.BaseGV)
+ C.ImmCost += 64; // Handle symbolic values conservatively.
+ // TODO: This should probably be the pointer size.
+ else if (Offset.isNonZero())
+ C.ImmCost +=
+ APInt(64, Offset.getKnownMinValue(), true).getSignificantBits();
+
+ // Check with target if this offset with this instruction is
+ // specifically not supported.
+ if (LU.Kind == LSRUse::Address && Offset.isNonZero() &&
+ !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
+ Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))
+ C.NumBaseAdds++;
+ } else {
+ // Incompatible immediate type, increase cost to avoid using
+ C.ImmCost += 2048;
+ }
}
// If we don't count instruction cost exit here.
@@ -1546,7 +1685,7 @@ void LSRFixup::print(raw_ostream &OS) const {
PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
}
- if (Offset != 0)
+ if (Offset.isNonZero())
OS << ", Offset=" << Offset;
}
@@ -1673,14 +1812,19 @@ LLVM_DUMP_METHOD void LSRUse::dump() const {
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, MemAccessTy AccessTy,
- GlobalValue *BaseGV, int64_t BaseOffset,
+ GlobalValue *BaseGV, Immediate BaseOffset,
bool HasBaseReg, int64_t Scale,
- Instruction *Fixup/*= nullptr*/) {
+ Instruction *Fixup /* = nullptr */) {
switch (Kind) {
- case LSRUse::Address:
- return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
- HasBaseReg, Scale, AccessTy.AddrSpace, Fixup);
-
+ case LSRUse::Address: {
+ int64_t FixedOffset =
+ BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
+ int64_t ScalableOffset =
+ BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
+ return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,
+ HasBaseReg, Scale, AccessTy.AddrSpace,
+ Fixup, ScalableOffset);
+ }
case LSRUse::ICmpZero:
// There's not even a target hook for querying whether it would be legal to
// fold a GV into an ICmp.
@@ -1688,7 +1832,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
return false;
// ICmp only has two operands; don't allow more than two non-trivial parts.
- if (Scale != 0 && HasBaseReg && BaseOffset != 0)
+ if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
return false;
// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
@@ -1698,7 +1842,12 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
- if (BaseOffset != 0) {
+ if (BaseOffset.isNonZero()) {
+ // We don't have an interface to query whether the target supports
+ // icmpzero against scalable quantities yet.
+ if (BaseOffset.isScalable())
+ return false;
+
// We have one of:
// ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
// ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
@@ -1706,8 +1855,8 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
if (Scale == 0)
// The cast does the right thing with
// std::numeric_limits<int64_t>::min().
- BaseOffset = -(uint64_t)BaseOffset;
- return TTI.isLegalICmpImmediate(BaseOffset);
+ BaseOffset = BaseOffset.getFixed(-(uint64_t)BaseOffset.getFixedValue());
+ return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());
}
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
@@ -1715,30 +1864,35 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
case LSRUse::Basic:
// Only handle single-register values.
- return !BaseGV && Scale == 0 && BaseOffset == 0;
+ return !BaseGV && Scale == 0 && BaseOffset.isZero();
case LSRUse::Special:
// Special case Basic to handle -1 scales.
- return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
+ return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
}
llvm_unreachable("Invalid LSRUse Kind!");
}
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
- int64_t MinOffset, int64_t MaxOffset,
+ Immediate MinOffset, Immediate MaxOffset,
LSRUse::KindType Kind, MemAccessTy AccessTy,
- GlobalValue *BaseGV, int64_t BaseOffset,
+ GlobalValue *BaseGV, Immediate BaseOffset,
bool HasBaseReg, int64_t Scale) {
+ if (BaseOffset.isNonZero() &&
+ (BaseOffset.isScalable() != MinOffset.isScalable() ||
+ BaseOffset.isScalable() != MaxOffset.isScalable()))
+ return false;
// Check for overflow.
- if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
- (MinOffset > 0))
+ int64_t Base = BaseOffset.getKnownMinValue();
+ int64_t Min = MinOffset.getKnownMinValue();
+ int64_t Max = MaxOffset.getKnownMinValue();
+ if (((int64_t)((uint64_t)Base + Min) > Base) != (Min > 0))
return false;
- MinOffset = (uint64_t)BaseOffset + MinOffset;
- if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
- (MaxOffset > 0))
+ MinOffset = Immediate::get((uint64_t)Base + Min, MinOffset.isScalable());
+ if (((int64_t)((uint64_t)Base + Max) > Base) != (Max > 0))
return false;
- MaxOffset = (uint64_t)BaseOffset + MaxOffset;
+ MaxOffset = Immediate::get((uint64_t)Base + Max, MaxOffset.isScalable());
return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
HasBaseReg, Scale) &&
@@ -1747,7 +1901,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
}
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
- int64_t MinOffset, int64_t MaxOffset,
+ Immediate MinOffset, Immediate MaxOffset,
LSRUse::KindType Kind, MemAccessTy AccessTy,
const Formula &F, const Loop &L) {
// For the purpose of isAMCompletelyFolded either having a canonical formula
@@ -1763,10 +1917,10 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
}
/// Test whether we know how to expand the current formula.
-static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
- int64_t MaxOffset, LSRUse::KindType Kind,
+static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset,
+ Immediate MaxOffset, LSRUse::KindType Kind,
MemAccessTy AccessTy, GlobalValue *BaseGV,
- int64_t BaseOffset, bool HasBaseReg, int64_t Scale) {
+ Immediate BaseOffset, bool HasBaseReg, int64_t Scale) {
// We know how to expand completely foldable formulae.
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
BaseOffset, HasBaseReg, Scale) ||
@@ -1777,13 +1931,21 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
BaseGV, BaseOffset, true, 0));
}
-static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
- int64_t MaxOffset, LSRUse::KindType Kind,
+static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset,
+ Immediate MaxOffset, LSRUse::KindType Kind,
MemAccessTy AccessTy, const Formula &F) {
return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
F.BaseOffset, F.HasBaseReg, F.Scale);
}
+static bool isLegalAddImmediate(const TargetTransformInfo &TTI,
+ Immediate Offset) {
+ if (Offset.isScalable())
+ return TTI.isLegalAddScalableImmediate(Offset.getKnownMinValue());
+
+ return TTI.isLegalAddImmediate(Offset.getFixedValue());
+}
+
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F) {
// Target may want to look at the user instructions.
@@ -1816,12 +1978,20 @@ static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
switch (LU.Kind) {
case LSRUse::Address: {
// Check the scaling factor cost with both the min and max offsets.
+ int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
+ if (F.BaseOffset.isScalable()) {
+ ScalableMin = (F.BaseOffset + LU.MinOffset).getKnownMinValue();
+ ScalableMax = (F.BaseOffset + LU.MaxOffset).getKnownMinValue();
+ } else {
+ FixedMin = (F.BaseOffset + LU.MinOffset).getFixedValue();
+ FixedMax = (F.BaseOffset + LU.MaxOffset).getFixedValue();
+ }
InstructionCost ScaleCostMinOffset = TTI.getScalingFactorCost(
- LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg,
- F.Scale, LU.AccessTy.AddrSpace);
+ LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMin, ScalableMin),
+ F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace);
InstructionCost ScaleCostMaxOffset = TTI.getScalingFactorCost(
- LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg,
- F.Scale, LU.AccessTy.AddrSpace);
+ LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMax, ScalableMax),
+ F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace);
assert(ScaleCostMinOffset.isValid() && ScaleCostMaxOffset.isValid() &&
"Legal addressing mode has an illegal cost!");
@@ -1840,10 +2010,11 @@ static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, MemAccessTy AccessTy,
- GlobalValue *BaseGV, int64_t BaseOffset,
+ GlobalValue *BaseGV, Immediate BaseOffset,
bool HasBaseReg) {
// Fast-path: zero is always foldable.
- if (BaseOffset == 0 && !BaseGV) return true;
+ if (BaseOffset.isZero() && !BaseGV)
+ return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
@@ -1856,13 +2027,22 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
HasBaseReg = true;
}
+ // FIXME: Try with + without a scale? Maybe based on TTI?
+ // I think basereg + scaledreg + immediateoffset isn't a good 'conservative'
+ // default for many architectures, not just AArch64 SVE. More investigation
+ // needed later to determine if this should be used more widely than just
+ // on scalable types.
+ if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
+ AccessTy.MemTy && AccessTy.MemTy->isScalableTy() && DropScaledForVScale)
+ Scale = 0;
+
return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
HasBaseReg, Scale);
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
- ScalarEvolution &SE, int64_t MinOffset,
- int64_t MaxOffset, LSRUse::KindType Kind,
+ ScalarEvolution &SE, Immediate MinOffset,
+ Immediate MaxOffset, LSRUse::KindType Kind,
MemAccessTy AccessTy, const SCEV *S,
bool HasBaseReg) {
// Fast-path: zero is always foldable.
@@ -1870,14 +2050,18 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
// Conservatively, create an address with an immediate and a
// base and a scale.
- int64_t BaseOffset = ExtractImmediate(S, SE);
+ Immediate BaseOffset = ExtractImmediate(S, SE);
GlobalValue *BaseGV = ExtractSymbol(S, SE);
// If there's anything else involved, it's not foldable.
if (!S->isZero()) return false;
// Fast-path: zero is always foldable.
- if (BaseOffset == 0 && !BaseGV) return true;
+ if (BaseOffset.isZero() && !BaseGV)
+ return true;
+
+ if (BaseOffset.isScalable())
+ return false;
// Conservatively, create an address with an immediate and a
// base and a scale.
@@ -2026,11 +2210,11 @@ class LSRInstance {
using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
UseMapTy UseMap;
- bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
+ bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset, bool HasBaseReg,
LSRUse::KindType Kind, MemAccessTy AccessTy);
- std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
- MemAccessTy AccessTy);
+ std::pair<size_t, Immediate> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
+ MemAccessTy AccessTy);
void DeleteUse(LSRUse &LU, size_t LUIdx);
@@ -2056,7 +2240,7 @@ class LSRInstance {
void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
const Formula &Base,
- const SmallVectorImpl<int64_t> &Worklist,
+ const SmallVectorImpl<Immediate> &Worklist,
size_t Idx, bool IsScaledReg = false);
void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
@@ -2215,17 +2399,20 @@ void LSRInstance::OptimizeShadowIV() {
// Ignore negative constants, as the code below doesn't handle them
// correctly. TODO: Remove this restriction.
- if (!C->getValue().isStrictlyPositive()) continue;
+ if (!C->getValue().isStrictlyPositive())
+ continue;
/* Add new PHINode. */
- PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
+ PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH->getIterator());
+ NewPH->setDebugLoc(PH->getDebugLoc());
/* create new increment. '++d' in above example. */
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
- BinaryOperator *NewIncr =
- BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
- Instruction::FAdd : Instruction::FSub,
- NewPH, CFP, "IV.S.next.", Incr);
+ BinaryOperator *NewIncr = BinaryOperator::Create(
+ Incr->getOpcode() == Instruction::Add ? Instruction::FAdd
+ : Instruction::FSub,
+ NewPH, CFP, "IV.S.next.", Incr->getIterator());
+ NewIncr->setDebugLoc(Incr->getDebugLoc());
NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
@@ -2395,8 +2582,8 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// Ok, everything looks ok to change the condition into an SLT or SGE and
// delete the max calculation.
- ICmpInst *NewCond =
- new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
+ ICmpInst *NewCond = new ICmpInst(Cond->getIterator(), Pred,
+ Cond->getOperand(0), NewRHS, "scmp");
// Delete the max calculation instructions.
NewCond->setDebugLoc(Cond->getDebugLoc());
@@ -2563,11 +2750,11 @@ LSRInstance::OptimizeLoopTermCond() {
/// Determine if the given use can accommodate a fixup at the given offset and
/// other details. If so, update the use and return true.
-bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
bool HasBaseReg, LSRUse::KindType Kind,
MemAccessTy AccessTy) {
- int64_t NewMinOffset = LU.MinOffset;
- int64_t NewMaxOffset = LU.MaxOffset;
+ Immediate NewMinOffset = LU.MinOffset;
+ Immediate NewMaxOffset = LU.MaxOffset;
MemAccessTy NewAccessTy = AccessTy;
// Check for a mismatched kind. It's tempting to collapse mismatched kinds to
@@ -2587,18 +2774,25 @@ bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
}
// Conservatively assume HasBaseReg is true for now.
- if (NewOffset < LU.MinOffset) {
+ if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
LU.MaxOffset - NewOffset, HasBaseReg))
return false;
NewMinOffset = NewOffset;
- } else if (NewOffset > LU.MaxOffset) {
+ } else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
NewOffset - LU.MinOffset, HasBaseReg))
return false;
NewMaxOffset = NewOffset;
}
+ // FIXME: We should be able to handle some level of scalable offset support
+ // for 'void', but in order to get basic support up and running this is
+ // being left out.
+ if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&
+ (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
+ return false;
+
// Update the use.
LU.MinOffset = NewMinOffset;
LU.MaxOffset = NewMaxOffset;
@@ -2609,17 +2803,17 @@ bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
/// Return an LSRUse index and an offset value for a fixup which needs the given
/// expression, with the given kind and optional access type. Either reuse an
/// existing use or create a new one, as needed.
-std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr,
- LSRUse::KindType Kind,
- MemAccessTy AccessTy) {
+std::pair<size_t, Immediate> LSRInstance::getUse(const SCEV *&Expr,
+ LSRUse::KindType Kind,
+ MemAccessTy AccessTy) {
const SCEV *Copy = Expr;
- int64_t Offset = ExtractImmediate(Expr, SE);
+ Immediate Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
Offset, /*HasBaseReg=*/ true)) {
Expr = Copy;
- Offset = 0;
+ Offset = Immediate::getFixed(0);
}
std::pair<UseMapTy::iterator, bool> P =
@@ -2680,7 +2874,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
F.BaseGV == OrigF.BaseGV &&
F.Scale == OrigF.Scale &&
F.UnfoldedOffset == OrigF.UnfoldedOffset) {
- if (F.BaseOffset == 0)
+ if (F.BaseOffset.isZero())
return &LU;
// This is the formula where all the registers and symbols matched;
// there aren't going to be any others. Since we declined it, we
@@ -3162,14 +3356,27 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
Value *Operand, const TargetTransformInfo &TTI) {
const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
- if (!IncConst || !isAddressUse(TTI, UserInst, Operand))
- return false;
+ Immediate IncOffset = Immediate::getZero();
+ if (IncConst) {
+ if (IncConst && IncConst->getAPInt().getSignificantBits() > 64)
+ return false;
+ IncOffset = Immediate::getFixed(IncConst->getValue()->getSExtValue());
+ } else {
+ // Look for mul(vscale, constant), to detect a scalable offset.
+ auto *IncVScale = dyn_cast<SCEVMulExpr>(IncExpr);
+ if (!IncVScale || IncVScale->getNumOperands() != 2 ||
+ !isa<SCEVVScale>(IncVScale->getOperand(1)))
+ return false;
+ auto *Scale = dyn_cast<SCEVConstant>(IncVScale->getOperand(0));
+ if (!Scale || Scale->getType()->getScalarSizeInBits() > 64)
+ return false;
+ IncOffset = Immediate::getScalable(Scale->getValue()->getSExtValue());
+ }
- if (IncConst->getAPInt().getSignificantBits() > 64)
+ if (!isAddressUse(TTI, UserInst, Operand))
return false;
MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);
- int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
IncOffset, /*HasBaseReg=*/false))
return false;
@@ -3217,6 +3424,10 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain,
Type *IVTy = IVSrc->getType();
Type *IntTy = SE.getEffectiveSCEVType(IVTy);
const SCEV *LeftOverExpr = nullptr;
+ const SCEV *Accum = SE.getZero(IntTy);
+ SmallVector<std::pair<const SCEV *, Value *>> Bases;
+ Bases.emplace_back(Accum, IVSrc);
+
for (const IVInc &Inc : Chain) {
Instruction *InsertPt = Inc.UserInst;
if (isa<PHINode>(InsertPt))
@@ -3229,10 +3440,31 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain,
// IncExpr was the result of subtraction of two narrow values, so must
// be signed.
const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);
+ Accum = SE.getAddExpr(Accum, IncExpr);
LeftOverExpr = LeftOverExpr ?
SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
}
- if (LeftOverExpr && !LeftOverExpr->isZero()) {
+
+ // Look through each base to see if any can produce a nice addressing mode.
+ bool FoundBase = false;
+ for (auto [MapScev, MapIVOper] : reverse(Bases)) {
+ const SCEV *Remainder = SE.getMinusSCEV(Accum, MapScev);
+ if (canFoldIVIncExpr(Remainder, Inc.UserInst, Inc.IVOperand, TTI)) {
+ if (!Remainder->isZero()) {
+ Rewriter.clearPostInc();
+ Value *IncV = Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
+ const SCEV *IVOperExpr =
+ SE.getAddExpr(SE.getUnknown(MapIVOper), SE.getUnknown(IncV));
+ IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
+ } else {
+ IVOper = MapIVOper;
+ }
+
+ FoundBase = true;
+ break;
+ }
+ }
+ if (!FoundBase && LeftOverExpr && !LeftOverExpr->isZero()) {
// Expand the IV increment.
Rewriter.clearPostInc();
Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
@@ -3243,6 +3475,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain,
// If an IV increment can't be folded, use it as the next IV value.
if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
+ Bases.emplace_back(Accum, IVOper);
IVSrc = IVOper;
LeftOverExpr = nullptr;
}
@@ -3377,9 +3610,9 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
}
// Get or create an LSRUse.
- std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
+ std::pair<size_t, Immediate> P = getUse(S, Kind, AccessTy);
size_t LUIdx = P.first;
- int64_t Offset = P.second;
+ Immediate Offset = P.second;
LSRUse &LU = Uses[LUIdx];
// Record the fixup.
@@ -3569,10 +3802,10 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
continue;
}
- std::pair<size_t, int64_t> P = getUse(
- S, LSRUse::Basic, MemAccessTy());
+ std::pair<size_t, Immediate> P =
+ getUse(S, LSRUse::Basic, MemAccessTy());
size_t LUIdx = P.first;
- int64_t Offset = P.second;
+ Immediate Offset = P.second;
LSRUse &LU = Uses[LUIdx];
LSRFixup &LF = LU.getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
@@ -3728,13 +3961,17 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
continue;
Formula F = Base;
+ if (F.UnfoldedOffset.isNonZero() && F.UnfoldedOffset.isScalable())
+ continue;
+
// Add the remaining pieces of the add back into the new formula.
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
- TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() +
InnerSumSC->getValue()->getZExtValue())) {
F.UnfoldedOffset =
- (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
+ Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +
+ InnerSumSC->getValue()->getZExtValue());
if (IsScaledReg)
F.ScaledReg = nullptr;
else
@@ -3747,10 +3984,11 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
// Add J as its own register, or an unfolded immediate.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
- TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() +
SC->getValue()->getZExtValue()))
F.UnfoldedOffset =
- (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
+ Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +
+ SC->getValue()->getZExtValue());
else
F.BaseRegs.push_back(*J);
// We may have changed the number of register in base regs, adjust the
@@ -3791,7 +4029,8 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// This method is only interesting on a plurality of registers.
if (Base.BaseRegs.size() + (Base.Scale == 1) +
- (Base.UnfoldedOffset != 0) <= 1)
+ (Base.UnfoldedOffset.isNonZero()) <=
+ 1)
return;
// Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
@@ -3840,11 +4079,11 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// If we have an unfolded offset, generate a formula combining it with the
// registers collected.
- if (NewBase.UnfoldedOffset) {
+ if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
assert(CombinedIntegerType && "Missing a type for the unfolded offset");
- Ops.push_back(SE.getConstant(CombinedIntegerType, NewBase.UnfoldedOffset,
- true));
- NewBase.UnfoldedOffset = 0;
+ Ops.push_back(SE.getConstant(CombinedIntegerType,
+ NewBase.UnfoldedOffset.getFixedValue(), true));
+ NewBase.UnfoldedOffset = Immediate::getFixed(0);
GenerateFormula(SE.getAddExpr(Ops));
}
}
@@ -3884,15 +4123,18 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
/// Helper function for LSRInstance::GenerateConstantOffsets.
void LSRInstance::GenerateConstantOffsetsImpl(
LSRUse &LU, unsigned LUIdx, const Formula &Base,
- const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
+ const SmallVectorImpl<Immediate> &Worklist, size_t Idx, bool IsScaledReg) {
- auto GenerateOffset = [&](const SCEV *G, int64_t Offset) {
+ auto GenerateOffset = [&](const SCEV *G, Immediate Offset) {
Formula F = Base;
- F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
+ if (!Base.BaseOffset.isCompatibleImmediate(Offset))
+ return;
+ F.BaseOffset = Base.BaseOffset.subUnsigned(Offset);
if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) {
// Add the offset to the base register.
- const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G);
+ const SCEV *NewOffset = Offset.getSCEV(SE, G->getType());
+ const SCEV *NewG = SE.getAddExpr(NewOffset, G);
// If it cancelled out, drop the base register, otherwise update it.
if (NewG->isZero()) {
if (IsScaledReg) {
@@ -3928,21 +4170,24 @@ void LSRInstance::GenerateConstantOffsetsImpl(
int64_t Step = StepInt.isNegative() ?
StepInt.getSExtValue() : StepInt.getZExtValue();
- for (int64_t Offset : Worklist) {
- Offset -= Step;
- GenerateOffset(G, Offset);
+ for (Immediate Offset : Worklist) {
+ if (Offset.isFixed()) {
+ Offset = Immediate::getFixed(Offset.getFixedValue() - Step);
+ GenerateOffset(G, Offset);
+ }
}
}
}
}
- for (int64_t Offset : Worklist)
+ for (Immediate Offset : Worklist)
GenerateOffset(G, Offset);
- int64_t Imm = ExtractImmediate(G, SE);
- if (G->isZero() || Imm == 0)
+ Immediate Imm = ExtractImmediate(G, SE);
+ if (G->isZero() || Imm.isZero() ||
+ !Base.BaseOffset.isCompatibleImmediate(Imm))
return;
Formula F = Base;
- F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
+ F.BaseOffset = F.BaseOffset.addUnsigned(Imm);
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
return;
if (IsScaledReg) {
@@ -3961,7 +4206,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// TODO: For now, just add the min and max offset, because it usually isn't
// worthwhile looking at everything inbetween.
- SmallVector<int64_t, 2> Worklist;
+ SmallVector<Immediate, 2> Worklist;
Worklist.push_back(LU.MinOffset);
if (LU.MaxOffset != LU.MinOffset)
Worklist.push_back(LU.MaxOffset);
@@ -4001,27 +4246,31 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
if (!ConstantInt::isValueValidForType(IntTy, Factor))
continue;
// Check that the multiplication doesn't overflow.
- if (Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
+ if (Base.BaseOffset.isMin() && Factor == -1)
+ continue;
+ // Not supporting scalable immediates.
+ if (Base.BaseOffset.isNonZero() && Base.BaseOffset.isScalable())
continue;
- int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
+ Immediate NewBaseOffset = Base.BaseOffset.mulUnsigned(Factor);
assert(Factor != 0 && "Zero factor not expected!");
- if (NewBaseOffset / Factor != Base.BaseOffset)
+ if (NewBaseOffset.getFixedValue() / Factor !=
+ Base.BaseOffset.getFixedValue())
continue;
// If the offset will be truncated at this use, check that it is in bounds.
if (!IntTy->isPointerTy() &&
- !ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
+ !ConstantInt::isValueValidForType(IntTy, NewBaseOffset.getFixedValue()))
continue;
// Check that multiplying with the use offset doesn't overflow.
- int64_t Offset = LU.MinOffset;
- if (Offset == std::numeric_limits<int64_t>::min() && Factor == -1)
+ Immediate Offset = LU.MinOffset;
+ if (Offset.isMin() && Factor == -1)
continue;
- Offset = (uint64_t)Offset * Factor;
- if (Offset / Factor != LU.MinOffset)
+ Offset = Offset.mulUnsigned(Factor);
+ if (Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
continue;
// If the offset will be truncated at this use, check that it is in bounds.
if (!IntTy->isPointerTy() &&
- !ConstantInt::isValueValidForType(IntTy, Offset))
+ !ConstantInt::isValueValidForType(IntTy, Offset.getFixedValue()))
continue;
Formula F = Base;
@@ -4032,7 +4281,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
continue;
// Compensate for the use having MinOffset built into it.
- F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
+ F.BaseOffset = F.BaseOffset.addUnsigned(Offset).subUnsigned(LU.MinOffset);
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
@@ -4051,16 +4300,16 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
}
// Check that multiplying with the unfolded offset doesn't overflow.
- if (F.UnfoldedOffset != 0) {
- if (F.UnfoldedOffset == std::numeric_limits<int64_t>::min() &&
- Factor == -1)
+ if (F.UnfoldedOffset.isNonZero()) {
+ if (F.UnfoldedOffset.isMin() && Factor == -1)
continue;
- F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
- if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
+ F.UnfoldedOffset = F.UnfoldedOffset.mulUnsigned(Factor);
+ if (F.UnfoldedOffset.getFixedValue() / Factor !=
+ Base.UnfoldedOffset.getFixedValue())
continue;
// If the offset will be truncated, check that it is in bounds.
- if (!IntTy->isPointerTy() &&
- !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
+ if (!IntTy->isPointerTy() && !ConstantInt::isValueValidForType(
+ IntTy, F.UnfoldedOffset.getFixedValue()))
continue;
}
@@ -4103,8 +4352,8 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
}
// For an ICmpZero, negating a solitary base register won't lead to
// new solutions.
- if (LU.Kind == LSRUse::ICmpZero &&
- !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
+ if (LU.Kind == LSRUse::ICmpZero && !Base.HasBaseReg &&
+ Base.BaseOffset.isZero() && !Base.BaseGV)
continue;
// For each addrec base reg, if its loop is current loop, apply the scale.
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
@@ -4230,10 +4479,10 @@ namespace {
/// structures moving underneath it.
struct WorkItem {
size_t LUIdx;
- int64_t Imm;
+ Immediate Imm;
const SCEV *OrigReg;
- WorkItem(size_t LI, int64_t I, const SCEV *R)
+ WorkItem(size_t LI, Immediate I, const SCEV *R)
: LUIdx(LI), Imm(I), OrigReg(R) {}
void print(raw_ostream &OS) const;
@@ -4257,14 +4506,14 @@ LLVM_DUMP_METHOD void WorkItem::dump() const {
/// opportunities between them.
void LSRInstance::GenerateCrossUseConstantOffsets() {
// Group the registers by their value without any added constant offset.
- using ImmMapTy = std::map<int64_t, const SCEV *>;
+ using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
DenseMap<const SCEV *, ImmMapTy> Map;
DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
SmallVector<const SCEV *, 8> Sequence;
for (const SCEV *Use : RegUses) {
const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify.
- int64_t Imm = ExtractImmediate(Reg, SE);
+ Immediate Imm = ExtractImmediate(Reg, SE);
auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy()));
if (Pair.second)
Sequence.push_back(Reg);
@@ -4276,7 +4525,8 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// a list of work to do and do the work in a separate step so that we're
// not adding formulae and register counts while we're searching.
SmallVector<WorkItem, 32> WorkItems;
- SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
+ SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>
+ UniqueItems;
for (const SCEV *Reg : Sequence) {
const ImmMapTy &Imms = Map.find(Reg)->second;
@@ -4295,7 +4545,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
J != JE; ++J) {
const SCEV *OrigReg = J->second;
- int64_t JImm = J->first;
+ Immediate JImm = J->first;
const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
if (!isa<SCEVConstant>(OrigReg) &&
@@ -4307,22 +4557,34 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// Conservatively examine offsets between this orig reg a few selected
// other orig regs.
- int64_t First = Imms.begin()->first;
- int64_t Last = std::prev(Imms.end())->first;
+ Immediate First = Imms.begin()->first;
+ Immediate Last = std::prev(Imms.end())->first;
+ if (!First.isCompatibleImmediate(Last)) {
+ LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
+ << "\n");
+ continue;
+ }
+ // Only scalable if both terms are scalable, or if one is scalable and
+ // the other is 0.
+ bool Scalable = First.isScalable() || Last.isScalable();
+ int64_t FI = First.getKnownMinValue();
+ int64_t LI = Last.getKnownMinValue();
// Compute (First + Last) / 2 without overflow using the fact that
// First + Last = 2 * (First + Last) + (First ^ Last).
- int64_t Avg = (First & Last) + ((First ^ Last) >> 1);
- // If the result is negative and First is odd and Last even (or vice versa),
+ int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
+ // If the result is negative and FI is odd and LI even (or vice versa),
// we rounded towards -inf. Add 1 in that case, to round towards 0.
- Avg = Avg + ((First ^ Last) & ((uint64_t)Avg >> 63));
+ Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));
ImmMapTy::const_iterator OtherImms[] = {
Imms.begin(), std::prev(Imms.end()),
- Imms.lower_bound(Avg)};
+ Imms.lower_bound(Immediate::get(Avg, Scalable))};
for (const auto &M : OtherImms) {
if (M == J || M == JE) continue;
+ if (!JImm.isCompatibleImmediate(M->first))
+ continue;
// Compute the difference between the two.
- int64_t Imm = (uint64_t)JImm - M->first;
+ Immediate Imm = JImm.subUnsigned(M->first);
for (unsigned LUIdx : UsedByIndices.set_bits())
// Make a memo of this use, offset, and register tuple.
if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
@@ -4340,11 +4602,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
for (const WorkItem &WI : WorkItems) {
size_t LUIdx = WI.LUIdx;
LSRUse &LU = Uses[LUIdx];
- int64_t Imm = WI.Imm;
+ Immediate Imm = WI.Imm;
const SCEV *OrigReg = WI.OrigReg;
Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
- const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
+ const SCEV *NegImmS = Imm.getNegativeSCEV(SE, IntTy);
unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
// TODO: Use a more targeted data structure.
@@ -4357,10 +4619,12 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
F.unscale();
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
- int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
+ if (!F.BaseOffset.isCompatibleImmediate(Imm))
+ continue;
+ Immediate Offset = F.BaseOffset.addUnsigned(Imm.mulUnsigned(F.Scale));
// Don't create 50 + reg(-50).
- if (F.referencesReg(SE.getSCEV(
- ConstantInt::get(IntTy, -(uint64_t)Offset))))
+ const SCEV *S = Offset.getNegativeSCEV(SE, IntTy);
+ if (F.referencesReg(S))
continue;
Formula NewF = F;
NewF.BaseOffset = Offset;
@@ -4372,11 +4636,18 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// If the new scale is a constant in a register, and adding the constant
// value to the immediate would produce a value closer to zero than the
// immediate itself, then the formula isn't worthwhile.
- if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
- if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) {
+ // FIXME: Do we need to do something for scalable immediates here?
+ // A scalable SCEV won't be constant, but we might still have
+ // something in the offset? Bail out for now to be safe.
+ if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
+ continue;
+ if (C->getValue()->isNegative() !=
+ (NewF.BaseOffset.isLessThanZero()) &&
(C->getAPInt().abs() * APInt(BitWidth, F.Scale))
- .ule(std::abs(NewF.BaseOffset)))
+ .ule(std::abs(NewF.BaseOffset.getFixedValue())))
continue;
+ }
// OK, looks good.
NewF.canonicalize(*this->L);
@@ -4388,16 +4659,21 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
if (BaseReg != OrigReg)
continue;
Formula NewF = F;
- NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
+ if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
+ !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
+ !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
+ continue;
+ NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, NewF)) {
if (AMK == TTI::AMK_PostIndexed &&
mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE))
continue;
- if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+ Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
+ if (!isLegalAddImmediate(TTI, NewUnfoldedOffset))
continue;
NewF = F;
- NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
+ NewF.UnfoldedOffset = NewUnfoldedOffset;
}
NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
@@ -4405,13 +4681,18 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// constant value to the immediate would produce a value closer to
// zero than the immediate itself, then the formula isn't worthwhile.
for (const SCEV *NewReg : NewF.BaseRegs)
- if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
- if ((C->getAPInt() + NewF.BaseOffset)
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg)) {
+ if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
+ goto skip_formula;
+ if ((C->getAPInt() + NewF.BaseOffset.getFixedValue())
.abs()
- .slt(std::abs(NewF.BaseOffset)) &&
- (C->getAPInt() + NewF.BaseOffset).countr_zero() >=
- (unsigned)llvm::countr_zero<uint64_t>(NewF.BaseOffset))
+ .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
+ (C->getAPInt() + NewF.BaseOffset.getFixedValue())
+ .countr_zero() >=
+ (unsigned)llvm::countr_zero<uint64_t>(
+ NewF.BaseOffset.getFixedValue()))
goto skip_formula;
+ }
// Ok, looks good.
NewF.canonicalize(*this->L);
@@ -4595,6 +4876,8 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
bool Any = false;
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
Formula &F = LU.Formulae[i];
+ if (F.BaseOffset.isNonZero() && F.BaseOffset.isScalable())
+ continue;
// Look for a formula with a constant or GV in a register. If the use
// also has a formula with that same value in an immediate field,
// delete the one that uses a register.
@@ -4604,7 +4887,9 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
Formula NewF = F;
//FIXME: Formulas should store bitwidth to do wrapping properly.
// See PR41034.
- NewF.BaseOffset += (uint64_t)C->getValue()->getSExtValue();
+ NewF.BaseOffset =
+ Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
+ (uint64_t)C->getValue()->getSExtValue());
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
@@ -4660,7 +4945,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (const Formula &F : LU.Formulae) {
- if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
+ if (F.BaseOffset.isZero() || (F.Scale != 0 && F.Scale != 1))
continue;
LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
@@ -5247,10 +5532,20 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
assert(Solution.size() == Uses.size() && "Malformed solution!");
+ const bool EnableDropUnprofitableSolution = [&] {
+ switch (AllowDropSolutionIfLessProfitable) {
+ case cl::BOU_TRUE:
+ return true;
+ case cl::BOU_FALSE:
+ return false;
+ case cl::BOU_UNSET:
+ return TTI.shouldDropLSRSolutionIfLessProfitable();
+ }
+ llvm_unreachable("Unhandled cl::boolOrDefault enum");
+ }();
+
if (BaselineCost.isLess(SolutionCost)) {
- LLVM_DEBUG(dbgs() << "The baseline solution requires ";
- BaselineCost.print(dbgs()); dbgs() << "\n");
- if (!AllowDropSolutionIfLessProfitable)
+ if (!EnableDropUnprofitableSolution)
LLVM_DEBUG(
dbgs() << "Baseline is more profitable than chosen solution, "
"add option 'lsr-drop-solution' to drop LSR solution.\n");
@@ -5485,31 +5780,36 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
Ops.push_back(SE.getUnknown(FullV));
}
+ // FIXME: Are we sure we won't get a mismatch here? Is there a way to bail
+ // out at this point, or should we generate a SCEV adding together mixed
+ // offsets?
+ assert(F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
+ "Expanding mismatched offsets\n");
// Expand the immediate portion.
- int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
- if (Offset != 0) {
+ Immediate Offset = F.BaseOffset.addUnsigned(LF.Offset);
+ if (Offset.isNonZero()) {
if (LU.Kind == LSRUse::ICmpZero) {
// The other interesting way of "folding" with an ICmpZero is to use a
// negated immediate.
if (!ICmpScaledV)
- ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
+ ICmpScaledV =
+ ConstantInt::get(IntTy, -(uint64_t)Offset.getFixedValue());
else {
Ops.push_back(SE.getUnknown(ICmpScaledV));
- ICmpScaledV = ConstantInt::get(IntTy, Offset);
+ ICmpScaledV = ConstantInt::get(IntTy, Offset.getFixedValue());
}
} else {
// Just add the immediate values. These again are expected to be matched
// as part of the address.
- Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
+ Ops.push_back(Offset.getUnknownSCEV(SE, IntTy));
}
}
// Expand the unfolded offset portion.
- int64_t UnfoldedOffset = F.UnfoldedOffset;
- if (UnfoldedOffset != 0) {
+ Immediate UnfoldedOffset = F.UnfoldedOffset;
+ if (UnfoldedOffset.isNonZero()) {
// Just add the immediate values.
- Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
- UnfoldedOffset)));
+ Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
}
// Emit instructions summing all the operands.
@@ -5532,10 +5832,9 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
"a scale at the same time!");
if (F.Scale == -1) {
if (ICmpScaledV->getType() != OpTy) {
- Instruction *Cast =
- CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
- OpTy, false),
- ICmpScaledV, OpTy, "tmp", CI);
+ Instruction *Cast = CastInst::Create(
+ CastInst::getCastOpcode(ICmpScaledV, false, OpTy, false),
+ ICmpScaledV, OpTy, "tmp", CI->getIterator());
ICmpScaledV = Cast;
}
CI->setOperand(1, ICmpScaledV);
@@ -5546,11 +5845,11 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
"ICmp does not support folding a global value and "
"a scale at the same time!");
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
- -(uint64_t)Offset);
+ -(uint64_t)Offset.getFixedValue());
if (C->getType() != OpTy) {
C = ConstantFoldCastOperand(
CastInst::getCastOpcode(C, false, OpTy, false), C, OpTy,
- CI->getModule()->getDataLayout());
+ CI->getDataLayout());
assert(C && "Cast of ConstantInt should have folded");
}
@@ -5635,11 +5934,10 @@ void LSRInstance::RewriteForPHI(
// If this is reuse-by-noop-cast, insert the noop cast.
Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy)
- FullV =
- CastInst::Create(CastInst::getCastOpcode(FullV, false,
- OpTy, false),
- FullV, LF.OperandValToReplace->getType(),
- "tmp", BB->getTerminator());
+ FullV = CastInst::Create(
+ CastInst::getCastOpcode(FullV, false, OpTy, false), FullV,
+ LF.OperandValToReplace->getType(), "tmp",
+ BB->getTerminator()->getIterator());
// If the incoming block for this value is not in the loop, it means the
// current PHI is not in a loop exit, so we must create a LCSSA PHI for
@@ -5657,8 +5955,8 @@ void LSRInstance::RewriteForPHI(
// formulae will not be implemented completely and some instructions
// will not be eliminated.
if (needUpdateFixups) {
- for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
- for (LSRFixup &Fixup : Uses[LUIdx].Fixups)
+ for (LSRUse &LU : Uses)
+ for (LSRFixup &Fixup : LU.Fixups)
// If fixup is supposed to rewrite some operand in the phi
// that was just updated, it may be already moved to
// another phi node. Such fixup requires update.
@@ -5711,8 +6009,8 @@ void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy) {
Instruction *Cast =
- CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
- FullV, OpTy, "tmp", LF.UserInst);
+ CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
+ FullV, OpTy, "tmp", LF.UserInst->getIterator());
FullV = Cast;
}
@@ -5856,7 +6154,7 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0
? PreferredAddresingMode
: TTI.getPreferredAddressingMode(L, &SE)),
- Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", false),
+ Rewriter(SE, L->getHeader()->getDataLayout(), "lsr", false),
BaselineCost(L, SE, TTI, AMK) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
@@ -5930,6 +6228,8 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
print_uses(dbgs()));
+ LLVM_DEBUG(dbgs() << "The baseline solution requires ";
+ BaselineCost.print(dbgs()); dbgs() << "\n");
// Now use the reuse data to generate a bunch of interesting ways
// to formulate the values needed for the uses.
@@ -6368,10 +6668,10 @@ struct DVIRecoveryRec {
DVIRecoveryRec(DbgValueInst *DbgValue)
: DbgRef(DbgValue), Expr(DbgValue->getExpression()),
HadLocationArgList(false) {}
- DVIRecoveryRec(DPValue *DPV)
- : DbgRef(DPV), Expr(DPV->getExpression()), HadLocationArgList(false) {}
+ DVIRecoveryRec(DbgVariableRecord *DVR)
+ : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(false) {}
- PointerUnion<DbgValueInst *, DPValue *> DbgRef;
+ PointerUnion<DbgValueInst *, DbgVariableRecord *> DbgRef;
DIExpression *Expr;
bool HadLocationArgList;
SmallVector<WeakVH, 2> LocationOps;
@@ -6467,7 +6767,7 @@ static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec,
if (isa<DbgValueInst *>(DVIRec.DbgRef))
UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
else
- UpdateDbgValueInstImpl(cast<DPValue *>(DVIRec.DbgRef));
+ UpdateDbgValueInstImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
}
/// Cached location ops may be erased during LSR, in which case a poison is
@@ -6513,7 +6813,7 @@ static void restorePreTransformState(DVIRecoveryRec &DVIRec) {
if (isa<DbgValueInst *>(DVIRec.DbgRef))
RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
else
- RestorePreTransformStateImpl(cast<DPValue *>(DVIRec.DbgRef));
+ RestorePreTransformStateImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
}
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
@@ -6523,7 +6823,7 @@ static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
if (isa<DbgValueInst *>(DVIRec.DbgRef)
? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation()
- : !cast<DPValue *>(DVIRec.DbgRef)->isKillLocation())
+ : !cast<DbgVariableRecord *>(DVIRec.DbgRef)->isKillLocation())
return false;
// LSR may have caused several changes to the dbg.value in the failed salvage
@@ -6621,7 +6921,7 @@ static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
<< *cast<DbgValueInst *>(DVIRec.DbgRef) << "\n");
else
LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: "
- << *cast<DPValue *>(DVIRec.DbgRef) << "\n");
+ << *cast<DbgVariableRecord *>(DVIRec.DbgRef) << "\n");
return true;
}
@@ -6712,9 +7012,9 @@ static void DbgGatherSalvagableDVI(
SalvageableDVISCEVs.push_back(std::move(NewRec));
return true;
};
- for (auto &DPV : I.getDbgValueRange()) {
- if (DPV.isDbgValue() || DPV.isDbgAssign())
- ProcessDbgValue(&DPV);
+ for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) {
+ if (DVR.isDbgValue() || DVR.isDbgAssign())
+ ProcessDbgValue(&DVR);
}
auto DVI = dyn_cast<DbgValueInst>(&I);
if (!DVI)
@@ -6762,7 +7062,7 @@ static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>>
canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
- const LoopInfo &LI) {
+ const LoopInfo &LI, const TargetTransformInfo &TTI) {
if (!L->isInnermost()) {
LLVM_DEBUG(dbgs() << "Cannot fold on non-innermost loop\n");
return std::nullopt;
@@ -6808,18 +7108,35 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
if (!matchSimpleRecurrence(LHS, ToFold, ToFoldStart, ToFoldStep))
return std::nullopt;
+ // Ensure the simple recurrence is a part of the current loop.
+ if (ToFold->getParent() != L->getHeader())
+ return std::nullopt;
+
// If that IV isn't dead after we rewrite the exit condition in terms of
// another IV, there's no point in doing the transform.
if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond))
return std::nullopt;
+ // Inserting instructions in the preheader has a runtime cost, scale
+ // the allowed cost with the loops trip count as best we can.
+ const unsigned ExpansionBudget = [&]() {
+ unsigned Budget = 2 * SCEVCheapExpansionBudget;
+ if (unsigned SmallTC = SE.getSmallConstantMaxTripCount(L))
+ return std::min(Budget, SmallTC);
+ if (std::optional<unsigned> SmallTC = getLoopEstimatedTripCount(L))
+ return std::min(Budget, *SmallTC);
+ // Unknown trip count, assume long running by default.
+ return Budget;
+ }();
+
const SCEV *BECount = SE.getBackedgeTakenCount(L);
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ const DataLayout &DL = L->getHeader()->getDataLayout();
SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
PHINode *ToHelpFold = nullptr;
const SCEV *TermValueS = nullptr;
bool MustDropPoison = false;
+ auto InsertPt = L->getLoopPreheader()->getTerminator();
for (PHINode &PN : L->getHeader()->phis()) {
if (ToFold == &PN)
continue;
@@ -6861,6 +7178,14 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
continue;
}
+ if (Expander.isHighCostExpansion(TermValueSLocal, L, ExpansionBudget,
+ &TTI, InsertPt)) {
+ LLVM_DEBUG(
+ dbgs() << "Is too expensive to expand terminating value for phi node"
+ << PN << "\n");
+ continue;
+ }
+
// The candidate IV may have been otherwise dead and poison from the
// very first iteration. If we can't disprove that, we can't use the IV.
if (!mustExecuteUBIfPoisonOnPathTo(&PN, LoopLatch->getTerminator(), &DT)) {
@@ -6941,12 +7266,13 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
if (EnablePhiElim && L->isLoopSimplifyForm()) {
SmallVector<WeakTrackingVH, 16> DeadInsts;
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ const DataLayout &DL = L->getHeader()->getDataLayout();
SCEVExpander Rewriter(SE, DL, "lsr", false);
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);
+ Rewriter.clear();
if (numFolded) {
Changed = true;
RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
@@ -6961,10 +7287,11 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
// skip the updates in each loop iteration.
if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
SmallVector<WeakTrackingVH, 16> DeadInsts;
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ const DataLayout &DL = L->getHeader()->getDataLayout();
SCEVExpander Rewriter(SE, DL, "lsr", true);
int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT,
UnusedIndVarInLoop, DeadInsts);
+ Rewriter.clear();
if (Rewrites) {
Changed = true;
RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
@@ -6986,7 +7313,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
}();
if (EnableFormTerm) {
- if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI)) {
+ if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI, TTI)) {
auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt;
Changed = true;
@@ -7010,9 +7337,8 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
cast<Instruction>(LoopValue)->dropPoisonGeneratingFlags();
// SCEVExpander for both use in preheader and latch
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ const DataLayout &DL = L->getHeader()->getDataLayout();
SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
- SCEVExpanderCleaner ExpCleaner(Expander);
assert(Expander.isSafeToExpand(TermValueS) &&
"Terminating value was checked safe in canFoldTerminatingCondition");
@@ -7043,10 +7369,9 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
BI->setCondition(NewTermCond);
+ Expander.clear();
OldTermCond->eraseFromParent();
DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
-
- ExpCleaner.markResultUsed();
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 7b4c54370e48..f8e2f1f28088 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -327,8 +327,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
UnrollCostEstimator OuterUCE(L, TTI, EphValues, UP.BEInsns);
if (!InnerUCE.canUnroll() || !OuterUCE.canUnroll()) {
- LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
- << " which cannot be duplicated or have invalid cost.\n");
+ LLVM_DEBUG(dbgs() << " Loop not considered unrollable\n");
return LoopUnrollResult::Unmodified;
}
@@ -341,7 +340,10 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return LoopUnrollResult::Unmodified;
}
- if (InnerUCE.Convergent || OuterUCE.Convergent) {
+ // FIXME: The call to canUnroll() allows some controlled convergent
+ // operations, but we block them here for future changes.
+ if (InnerUCE.Convergence != ConvergenceKind::None ||
+ OuterUCE.Convergence != ConvergenceKind::None) {
LLVM_DEBUG(
dbgs() << " Not unrolling loop with convergent instructions.\n");
return LoopUnrollResult::Unmodified;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 7cfeb019af97..cbc35b6dd429 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -27,6 +28,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/LoopUnrollAnalyzer.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -173,6 +175,10 @@ static cl::opt<unsigned>
cl::desc("Default threshold (max size of unrolled "
"loop), used in all but O3 optimizations"));
+static cl::opt<unsigned> PragmaUnrollFullMaxIterations(
+ "pragma-unroll-full-max-iterations", cl::init(1'000'000), cl::Hidden,
+ cl::desc("Maximum allowed iterations to unroll under pragma unroll full."));
+
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
@@ -446,7 +452,15 @@ static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
// First accumulate the cost of this instruction.
if (!Cost.IsFree) {
- UnrolledCost += TTI.getInstructionCost(I, CostKind);
+ // Consider simplified operands in instruction cost.
+ SmallVector<Value *, 4> Operands;
+ transform(I->operands(), std::back_inserter(Operands),
+ [&](Value *Op) {
+ if (auto Res = SimplifiedValues.lookup(Op))
+ return Res;
+ return Op;
+ });
+ UnrolledCost += TTI.getInstructionCost(I, Operands, CostKind);
LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
<< Iteration << "): ");
LLVM_DEBUG(I->dump());
@@ -670,11 +684,15 @@ UnrollCostEstimator::UnrollCostEstimator(
const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
CodeMetrics Metrics;
for (BasicBlock *BB : L->blocks())
- Metrics.analyzeBasicBlock(BB, TTI, EphValues);
+ Metrics.analyzeBasicBlock(BB, TTI, EphValues, /* PrepareForLTO= */ false,
+ L);
NumInlineCandidates = Metrics.NumInlineCandidates;
NotDuplicatable = Metrics.notDuplicatable;
- Convergent = Metrics.convergent;
+ Convergence = Metrics.Convergence;
LoopSize = Metrics.NumInsts;
+ ConvergenceAllowsRuntime =
+ Metrics.Convergence != ConvergenceKind::Uncontrolled &&
+ !getLoopConvergenceHeart(L);
// Don't allow an estimate of size zero. This would allows unrolling of loops
// with huge iteration counts, which is a compile time problem even if it's
@@ -687,6 +705,25 @@ UnrollCostEstimator::UnrollCostEstimator(
LoopSize = BEInsns + 1;
}
+bool UnrollCostEstimator::canUnroll() const {
+ switch (Convergence) {
+ case ConvergenceKind::ExtendedLoop:
+ LLVM_DEBUG(dbgs() << " Convergence prevents unrolling.\n");
+ return false;
+ default:
+ break;
+ }
+ if (!LoopSize.isValid()) {
+ LLVM_DEBUG(dbgs() << " Invalid loop size prevents unrolling.\n");
+ return false;
+ }
+ if (NotDuplicatable) {
+ LLVM_DEBUG(dbgs() << " Non-duplicatable blocks prevent unrolling.\n");
+ return false;
+ }
+ return true;
+}
+
uint64_t UnrollCostEstimator::getUnrolledLoopSize(
const TargetTransformInfo::UnrollingPreferences &UP,
unsigned CountOverwrite) const {
@@ -776,8 +813,17 @@ shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
return PInfo.PragmaCount;
}
- if (PInfo.PragmaFullUnroll && TripCount != 0)
+ if (PInfo.PragmaFullUnroll && TripCount != 0) {
+ // Certain cases with UBSAN can cause trip count to be calculated as
+ // INT_MAX, Block full unrolling at a reasonable limit so that the compiler
+ // doesn't hang trying to unroll the loop. See PR77842
+ if (TripCount > PragmaUnrollFullMaxIterations) {
+ LLVM_DEBUG(dbgs() << "Won't unroll; trip count is too large\n");
+ return std::nullopt;
+ }
+
return TripCount;
+ }
if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount &&
MaxTripCount <= UP.MaxUpperBound)
@@ -1119,7 +1165,8 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
std::optional<bool> ProvidedUpperBound,
std::optional<bool> ProvidedAllowPeeling,
std::optional<bool> ProvidedAllowProfileBasedPeeling,
- std::optional<unsigned> ProvidedFullUnrollMaxCount) {
+ std::optional<unsigned> ProvidedFullUnrollMaxCount,
+ AAResults *AA = nullptr) {
LLVM_DEBUG(dbgs() << "Loop Unroll: F["
<< L->getHeader()->getParent()->getName() << "] Loop %"
@@ -1182,8 +1229,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
if (!UCE.canUnroll()) {
- LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
- << " which cannot be duplicated or have invalid cost.\n");
+ LLVM_DEBUG(dbgs() << " Loop not considered unrollable.\n");
return LoopUnrollResult::Unmodified;
}
@@ -1230,15 +1276,9 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
// is unsafe -- it adds a control-flow dependency to the convergent
// operation. Therefore restrict remainder loop (try unrolling without).
//
- // TODO: This is quite conservative. In practice, convergent_op()
- // is likely to be called unconditionally in the loop. In this
- // case, the program would be ill-formed (on most architectures)
- // unless n were the same on all threads in a thread group.
- // Assuming n is the same on all threads, any kind of unrolling is
- // safe. But currently llvm's notion of convergence isn't powerful
- // enough to express this.
- if (UCE.Convergent)
- UP.AllowRemainder = false;
+ // TODO: This is somewhat conservative; we could allow the remainder if the
+ // trip count is uniform.
+ UP.AllowRemainder &= UCE.ConvergenceAllowsRuntime;
// Try to find the trip count upper bound if we cannot find the exact trip
// count.
@@ -1258,6 +1298,8 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
if (!UP.Count)
return LoopUnrollResult::Unmodified;
+ UP.Runtime &= UCE.ConvergenceAllowsRuntime;
+
if (PP.PeelCount) {
assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName()
@@ -1271,7 +1313,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
ValueToValueMapTy VMap;
if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA, VMap)) {
- simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI);
+ simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI, nullptr);
// If the loop was peeled, we already "used up" the profile information
// we had, so we don't want to unroll or peel again.
if (PP.PeelProfiledIterations)
@@ -1282,7 +1324,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
}
// Do not attempt partial/runtime unrolling in FullLoopUnrolling
- if (OnlyFullUnroll && !(UP.Count >= MaxTripCount)) {
+ if (OnlyFullUnroll && (UP.Count < TripCount || UP.Count < MaxTripCount)) {
LLVM_DEBUG(
dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");
return LoopUnrollResult::Unmodified;
@@ -1300,11 +1342,16 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
// Unroll the loop.
Loop *RemainderLoop = nullptr;
+ UnrollLoopOptions ULO;
+ ULO.Count = UP.Count;
+ ULO.Force = UP.Force;
+ ULO.AllowExpensiveTripCount = UP.AllowExpensiveTripCount;
+ ULO.UnrollRemainder = UP.UnrollRemainder;
+ ULO.Runtime = UP.Runtime;
+ ULO.ForgetAllSCEV = ForgetAllSCEV;
+ ULO.Heart = getLoopConvergenceHeart(L);
LoopUnrollResult UnrollResult = UnrollLoop(
- L,
- {UP.Count, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
- UP.UnrollRemainder, ForgetAllSCEV},
- LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop);
+ L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
if (UnrollResult == LoopUnrollResult::Unmodified)
return LoopUnrollResult::Unmodified;
@@ -1551,6 +1598,7 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ AAResults &AA = AM.getResult<AAManager>(F);
LoopAnalysisManager *LAM = nullptr;
if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
@@ -1606,7 +1654,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
/*Count*/ std::nullopt,
/*Threshold*/ std::nullopt, UnrollOpts.AllowPartial,
UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling,
- UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount);
+ UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount,
+ &AA);
Changed |= Result != LoopUnrollResult::Unmodified;
// The parent must not be damaged by unrolling!
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index f39c24484840..663715948241 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -582,7 +582,7 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
const Function *F = L.getHeader()->getParent();
OptimizationRemarkEmitter ORE(F);
- LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr);
+ LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr);
if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp
index 6aba913005d0..b42d3b2bc09a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerAtomicPass.cpp
@@ -20,7 +20,7 @@
#include "llvm/Transforms/Utils/LowerAtomic.h"
using namespace llvm;
-#define DEBUG_TYPE "loweratomic"
+#define DEBUG_TYPE "lower-atomic"
static bool LowerFenceInst(FenceInst *FI) {
FI->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
index b167120a906d..bd7895feb64a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
@@ -85,8 +85,11 @@ static bool replaceConditionalBranchesOnConstant(Instruction *II,
if (Target && Target != Other) {
BasicBlock *Source = BI->getParent();
Other->removePredecessor(Source);
+
+ Instruction *NewBI = BranchInst::Create(Target, Source);
+ NewBI->setDebugLoc(BI->getDebugLoc());
BI->eraseFromParent();
- BranchInst::Create(Target, Source);
+
if (DTU)
DTU->applyUpdates({{DominatorTree::Delete, Source, Other}});
if (pred_empty(Other))
@@ -103,7 +106,7 @@ static bool lowerConstantIntrinsics(Function &F, const TargetLibraryInfo &TLI,
DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
bool HasDeadBlocks = false;
- const auto &DL = F.getParent()->getDataLayout();
+ const auto &DL = F.getDataLayout();
SmallVector<WeakTrackingVH, 8> Worklist;
ReversePostOrderTraversal<Function *> RPOT(&F);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 6f87e4d91d2c..17c5a4ee1fd0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -102,7 +102,7 @@ static bool handleSwitchExpect(SwitchInst &SI) {
misexpect::checkExpectAnnotations(SI, Weights, /*IsFrontend=*/true);
SI.setCondition(ArgValue);
- setBranchWeights(SI, Weights);
+ setBranchWeights(SI, Weights, /*IsExpected=*/true);
return true;
}
@@ -262,11 +262,13 @@ static void handlePhiDef(CallInst *Expect) {
if (IsOpndComingFromSuccessor(BI->getSuccessor(1)))
BI->setMetadata(LLVMContext::MD_prof,
MDB.createBranchWeights(LikelyBranchWeightVal,
- UnlikelyBranchWeightVal));
+ UnlikelyBranchWeightVal,
+ /*IsExpected=*/true));
else if (IsOpndComingFromSuccessor(BI->getSuccessor(0)))
BI->setMetadata(LLVMContext::MD_prof,
MDB.createBranchWeights(UnlikelyBranchWeightVal,
- LikelyBranchWeightVal));
+ LikelyBranchWeightVal,
+ /*IsExpected=*/true));
}
}
@@ -331,12 +333,12 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
SmallVector<uint32_t, 4> ExpectedWeights;
if ((ExpectedValue->getZExtValue() == ValueComparedTo) ==
(Predicate == CmpInst::ICMP_EQ)) {
- Node =
- MDB.createBranchWeights(LikelyBranchWeightVal, UnlikelyBranchWeightVal);
+ Node = MDB.createBranchWeights(
+ LikelyBranchWeightVal, UnlikelyBranchWeightVal, /*IsExpected=*/true);
ExpectedWeights = {LikelyBranchWeightVal, UnlikelyBranchWeightVal};
} else {
- Node =
- MDB.createBranchWeights(UnlikelyBranchWeightVal, LikelyBranchWeightVal);
+ Node = MDB.createBranchWeights(UnlikelyBranchWeightVal,
+ LikelyBranchWeightVal, /*IsExpected=*/true);
ExpectedWeights = {UnlikelyBranchWeightVal, LikelyBranchWeightVal};
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 72b9db1e73d7..6a681fd93397 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -19,6 +19,7 @@
#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -192,6 +193,109 @@ Value *computeVectorAddr(Value *BasePtr, Value *VecIdx, Value *Stride,
return VecStart;
}
+namespace {
+struct ShapeInfo {
+ unsigned NumRows;
+ unsigned NumColumns;
+
+ bool IsColumnMajor;
+
+ ShapeInfo(unsigned NumRows = 0, unsigned NumColumns = 0)
+ : NumRows(NumRows), NumColumns(NumColumns),
+ IsColumnMajor(MatrixLayout == MatrixLayoutTy::ColumnMajor) {}
+
+ ShapeInfo(Value *NumRows, Value *NumColumns)
+ : ShapeInfo(cast<ConstantInt>(NumRows)->getZExtValue(),
+ cast<ConstantInt>(NumColumns)->getZExtValue()) {}
+
+ bool operator==(const ShapeInfo &other) {
+ return NumRows == other.NumRows && NumColumns == other.NumColumns;
+ }
+ bool operator!=(const ShapeInfo &other) { return !(*this == other); }
+
+ /// Returns true if shape-information is defined, meaning both dimensions
+ /// are != 0.
+ operator bool() const {
+ assert(NumRows == 0 || NumColumns != 0);
+ return NumRows != 0;
+ }
+
+ unsigned getStride() const {
+ if (IsColumnMajor)
+ return NumRows;
+ return NumColumns;
+ }
+
+ unsigned getNumVectors() const {
+ if (IsColumnMajor)
+ return NumColumns;
+ return NumRows;
+ }
+
+ /// Returns the transposed shape.
+ ShapeInfo t() const { return ShapeInfo(NumColumns, NumRows); }
+};
+} // namespace
+
+static bool isUniformShape(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return true;
+
+ switch (I->getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul: // Scalar multiply.
+ case Instruction::FNeg:
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::Sub:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Return the ShapeInfo for the result of \p I, it it can be determined.
+static std::optional<ShapeInfo>
+computeShapeInfoForInst(Instruction *I,
+ const ValueMap<Value *, ShapeInfo> &ShapeMap) {
+ Value *M;
+ Value *N;
+ Value *K;
+ if (match(I, m_Intrinsic<Intrinsic::matrix_multiply>(
+ m_Value(), m_Value(), m_Value(M), m_Value(N), m_Value(K))))
+ return ShapeInfo(M, K);
+ if (match(I, m_Intrinsic<Intrinsic::matrix_transpose>(m_Value(), m_Value(M),
+ m_Value(N)))) {
+ // Flip dimensions.
+ return ShapeInfo(N, M);
+ }
+ if (match(I, m_Intrinsic<Intrinsic::matrix_column_major_store>(
+ m_Value(), m_Value(), m_Value(), m_Value(), m_Value(M),
+ m_Value(N))))
+ return ShapeInfo(N, M);
+ if (match(I, m_Intrinsic<Intrinsic::matrix_column_major_load>(
+ m_Value(), m_Value(), m_Value(), m_Value(M), m_Value(N))))
+ return ShapeInfo(M, N);
+ Value *MatrixA;
+ if (match(I, m_Store(m_Value(MatrixA), m_Value()))) {
+ auto OpShape = ShapeMap.find(MatrixA);
+ if (OpShape != ShapeMap.end())
+ return OpShape->second;
+ }
+
+ if (isUniformShape(I)) {
+ // Find the first operand that has a known shape and use that.
+ for (auto &Op : I->operands()) {
+ auto OpShape = ShapeMap.find(Op.get());
+ if (OpShape != ShapeMap.end())
+ return OpShape->second;
+ }
+ }
+ return std::nullopt;
+}
+
/// LowerMatrixIntrinsics contains the methods used to lower matrix intrinsics.
///
/// Currently, the lowering for each matrix intrinsic is done as follows:
@@ -383,48 +487,6 @@ class LowerMatrixIntrinsics {
}
};
- struct ShapeInfo {
- unsigned NumRows;
- unsigned NumColumns;
-
- bool IsColumnMajor;
-
- ShapeInfo(unsigned NumRows = 0, unsigned NumColumns = 0)
- : NumRows(NumRows), NumColumns(NumColumns),
- IsColumnMajor(MatrixLayout == MatrixLayoutTy::ColumnMajor) {}
-
- ShapeInfo(Value *NumRows, Value *NumColumns)
- : ShapeInfo(cast<ConstantInt>(NumRows)->getZExtValue(),
- cast<ConstantInt>(NumColumns)->getZExtValue()) {}
-
- bool operator==(const ShapeInfo &other) {
- return NumRows == other.NumRows && NumColumns == other.NumColumns;
- }
- bool operator!=(const ShapeInfo &other) { return !(*this == other); }
-
- /// Returns true if shape-information is defined, meaning both dimensions
- /// are != 0.
- operator bool() const {
- assert(NumRows == 0 || NumColumns != 0);
- return NumRows != 0;
- }
-
- unsigned getStride() const {
- if (IsColumnMajor)
- return NumRows;
- return NumColumns;
- }
-
- unsigned getNumVectors() const {
- if (IsColumnMajor)
- return NumColumns;
- return NumRows;
- }
-
- /// Returns the transposed shape.
- ShapeInfo t() const { return ShapeInfo(NumColumns, NumRows); }
- };
-
/// Maps instructions to their shape information. The shape information
/// describes the shape to be used while lowering. This matches the shape of
/// the result value of the instruction, with the only exceptions being store
@@ -459,7 +521,7 @@ public:
LowerMatrixIntrinsics(Function &F, TargetTransformInfo &TTI,
AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
OptimizationRemarkEmitter *ORE)
- : Func(F), DL(F.getParent()->getDataLayout()), TTI(TTI), AA(AA), DT(DT),
+ : Func(F), DL(F.getDataLayout()), TTI(TTI), AA(AA), DT(DT),
LI(LI), ORE(ORE) {}
unsigned getNumOps(Type *VT) {
@@ -554,25 +616,6 @@ public:
return true;
}
- bool isUniformShape(Value *V) {
- Instruction *I = dyn_cast<Instruction>(V);
- if (!I)
- return true;
-
- switch (I->getOpcode()) {
- case Instruction::FAdd:
- case Instruction::FSub:
- case Instruction::FMul: // Scalar multiply.
- case Instruction::FNeg:
- case Instruction::Add:
- case Instruction::Mul:
- case Instruction::Sub:
- return true;
- default:
- return false;
- }
- }
-
/// Returns true if shape information can be used for \p V. The supported
/// instructions must match the instructions that can be lowered by this pass.
bool supportsShapeInfo(Value *V) {
@@ -610,43 +653,8 @@ public:
// New entry, set the value and insert operands
bool Propagate = false;
-
- Value *MatrixA;
- Value *MatrixB;
- Value *M;
- Value *N;
- Value *K;
- if (match(Inst, m_Intrinsic<Intrinsic::matrix_multiply>(
- m_Value(MatrixA), m_Value(MatrixB), m_Value(M),
- m_Value(N), m_Value(K)))) {
- Propagate = setShapeInfo(Inst, {M, K});
- } else if (match(Inst, m_Intrinsic<Intrinsic::matrix_transpose>(
- m_Value(MatrixA), m_Value(M), m_Value(N)))) {
- // Flip dimensions.
- Propagate = setShapeInfo(Inst, {N, M});
- } else if (match(Inst, m_Intrinsic<Intrinsic::matrix_column_major_store>(
- m_Value(MatrixA), m_Value(), m_Value(),
- m_Value(), m_Value(M), m_Value(N)))) {
- Propagate = setShapeInfo(Inst, {N, M});
- } else if (match(Inst, m_Intrinsic<Intrinsic::matrix_column_major_load>(
- m_Value(), m_Value(), m_Value(), m_Value(M),
- m_Value(N)))) {
- Propagate = setShapeInfo(Inst, {M, N});
- } else if (match(Inst, m_Store(m_Value(MatrixA), m_Value()))) {
- auto OpShape = ShapeMap.find(MatrixA);
- if (OpShape != ShapeMap.end())
- setShapeInfo(Inst, OpShape->second);
- continue;
- } else if (isUniformShape(Inst)) {
- // Find the first operand that has a known shape and use that.
- for (auto &Op : Inst->operands()) {
- auto OpShape = ShapeMap.find(Op.get());
- if (OpShape != ShapeMap.end()) {
- Propagate |= setShapeInfo(Inst, OpShape->second);
- break;
- }
- }
- }
+ if (auto SI = computeShapeInfoForInst(Inst, ShapeMap))
+ Propagate = setShapeInfo(Inst, *SI);
if (Propagate) {
NewWorkList.push_back(Inst);
@@ -891,20 +899,28 @@ public:
updateShapeAndReplaceAllUsesWith(I, NewInst);
CleanupBinOp(I, A, B);
}
- // A^t + B ^t -> (A + B)^t
+ // A^t + B ^t -> (A + B)^t. Pick rows and columns from first transpose. If
+ // the shape of the second transpose is different, there's a shape conflict
+ // which gets resolved by picking the shape of the first operand.
else if (match(&I, m_FAdd(m_Value(A), m_Value(B))) &&
match(A, m_Intrinsic<Intrinsic::matrix_transpose>(
m_Value(AT), m_ConstantInt(R), m_ConstantInt(C))) &&
match(B, m_Intrinsic<Intrinsic::matrix_transpose>(
- m_Value(BT), m_ConstantInt(R), m_ConstantInt(C)))) {
+ m_Value(BT), m_ConstantInt(), m_ConstantInt()))) {
IRBuilder<> Builder(&I);
- Value *Add = cast<Instruction>(Builder.CreateFAdd(AT, BT, "mfadd"));
- setShapeInfo(Add, {C, R});
+ auto *Add = cast<Instruction>(Builder.CreateFAdd(AT, BT, "mfadd"));
+ setShapeInfo(Add, {R, C});
MatrixBuilder MBuilder(Builder);
Instruction *NewInst = MBuilder.CreateMatrixTranspose(
- Add, C->getZExtValue(), R->getZExtValue(), "mfadd_t");
+ Add, R->getZExtValue(), C->getZExtValue(), "mfadd_t");
updateShapeAndReplaceAllUsesWith(I, NewInst);
+ assert(computeShapeInfoForInst(NewInst, ShapeMap) ==
+ computeShapeInfoForInst(&I, ShapeMap) &&
+ "Shape of new instruction doesn't match original shape.");
CleanupBinOp(I, A, B);
+ assert(computeShapeInfoForInst(Add, ShapeMap).value_or(ShapeMap[Add]) ==
+ ShapeMap[Add] &&
+ "Shape of updated addition doesn't match cached shape.");
}
}
@@ -975,12 +991,15 @@ public:
bool Changed = false;
SmallVector<CallInst *, 16> MaybeFusableInsts;
SmallVector<Instruction *, 16> MatrixInsts;
+ SmallVector<IntrinsicInst *, 16> LifetimeEnds;
// First, collect all instructions with shape information and candidates for
// fusion (currently only matrix multiplies).
ReversePostOrderTraversal<Function *> RPOT(&Func);
for (auto *BB : RPOT)
for (Instruction &I : *BB) {
+ if (match(&I, m_Intrinsic<Intrinsic::lifetime_end>()))
+ LifetimeEnds.push_back(cast<IntrinsicInst>(&I));
if (ShapeMap.find(&I) == ShapeMap.end())
continue;
if (match(&I, m_Intrinsic<Intrinsic::matrix_multiply>()))
@@ -995,7 +1014,7 @@ public:
// Third, try to fuse candidates.
for (CallInst *CI : MaybeFusableInsts)
- LowerMatrixMultiplyFused(CI, FusedInsts);
+ LowerMatrixMultiplyFused(CI, FusedInsts, LifetimeEnds);
Changed = !FusedInsts.empty();
@@ -1332,8 +1351,8 @@ public:
if (!IsIntVec && !FMF.allowReassoc())
return;
- auto CanBeFlattened = [this](Value *Op) {
- if (match(Op, m_BinOp()) && ShapeMap.find(Op) != ShapeMap.end())
+ auto CanBeFlattened = [](Value *Op) {
+ if (match(Op, m_BinOp()))
return true;
return match(
Op, m_OneUse(m_CombineOr(
@@ -1346,6 +1365,9 @@ public:
// the returned cost is < 0, the argument is cheaper to use in the
// dot-product lowering.
auto GetCostForArg = [this, &CanBeFlattened](Value *Op, unsigned N) {
+ if (ShapeMap.find(Op) == ShapeMap.end())
+ return InstructionCost::getInvalid();
+
if (!isa<Instruction>(Op))
return InstructionCost(0);
@@ -1356,7 +1378,7 @@ public:
InstructionCost EmbedCost(0);
// Roughly estimate the cost for embedding the columns into a vector.
for (unsigned I = 1; I < N; ++I)
- EmbedCost -=
+ EmbedCost +=
TTI.getShuffleCost(TTI::SK_Splice, FixedVectorType::get(EltTy, 1),
std::nullopt, TTI::TCK_RecipThroughput);
return EmbedCost;
@@ -1378,7 +1400,7 @@ public:
// vector.
InstructionCost EmbedCost(0);
for (unsigned I = 1; I < N; ++I)
- EmbedCost +=
+ EmbedCost -=
TTI.getShuffleCost(TTI::SK_Splice, FixedVectorType::get(EltTy, 1),
std::nullopt, TTI::TCK_RecipThroughput);
return EmbedCost;
@@ -1391,7 +1413,29 @@ public:
return TTI.getMemoryOpCost(Instruction::Load, VecTy, Align(1), 0) -
N * TTI.getMemoryOpCost(Instruction::Load, EltTy, Align(1), 0);
};
- auto LHSCost = GetCostForArg(LHS, LShape.NumColumns);
+
+ // Iterate over LHS and operations feeding LHS and check if it is profitable
+ // to flatten the visited ops. For each op, we compute the difference
+ // between the flattened and matrix versions.
+ SmallPtrSet<Value *, 4> Seen;
+ SmallVector<Value *> WorkList;
+ SmallVector<Value *> ToFlatten;
+ WorkList.push_back(LHS);
+ InstructionCost LHSCost(0);
+ while (!WorkList.empty()) {
+ Value *Op = WorkList.pop_back_val();
+ if (!Seen.insert(Op).second)
+ continue;
+
+ InstructionCost OpCost = GetCostForArg(Op, LShape.NumColumns);
+ if (OpCost + LHSCost >= LHSCost)
+ continue;
+
+ LHSCost += OpCost;
+ ToFlatten.push_back(Op);
+ if (auto *I = dyn_cast<Instruction>(Op))
+ WorkList.append(I->op_begin(), I->op_end());
+ }
// We compare the costs of a vector.reduce.add to sequential add.
int AddOpCode = IsIntVec ? Instruction::Add : Instruction::FAdd;
@@ -1412,16 +1456,16 @@ public:
FusedInsts.insert(MatMul);
IRBuilder<> Builder(MatMul);
auto FlattenArg = [&Builder, &FusedInsts, &CanBeFlattened,
- this](Value *Op) -> Value * {
+ this](Value *Op) {
// Matmul must be the only user of loads because we don't use LowerLoad
// for row vectors (LowerLoad results in scalar loads and shufflevectors
// instead of single vector load).
if (!CanBeFlattened(Op))
- return Op;
+ return;
if (match(Op, m_BinOp()) && ShapeMap.find(Op) != ShapeMap.end()) {
ShapeMap[Op] = ShapeMap[Op].t();
- return Op;
+ return;
}
FusedInsts.insert(cast<Instruction>(Op));
@@ -1432,16 +1476,19 @@ public:
auto *NewLoad = Builder.CreateLoad(Op->getType(), Arg);
Op->replaceAllUsesWith(NewLoad);
cast<Instruction>(Op)->eraseFromParent();
- return NewLoad;
+ return;
} else if (match(Op, m_Intrinsic<Intrinsic::matrix_transpose>(
m_Value(Arg)))) {
ToRemove.push_back(cast<Instruction>(Op));
- return Arg;
+ Op->replaceAllUsesWith(Arg);
+ return;
}
-
- return Op;
};
- LHS = FlattenArg(LHS);
+
+ for (auto *V : ToFlatten)
+ FlattenArg(V);
+
+ LHS = MatMul->getArgOperand(0);
// Insert mul/fmul and llvm.vector.reduce.fadd
Value *Mul =
@@ -1594,7 +1641,7 @@ public:
IRBuilder<> Builder(MatMul);
Check0->getTerminator()->eraseFromParent();
Builder.SetInsertPoint(Check0);
- Type *IntPtrTy = Builder.getIntPtrTy(Load->getModule()->getDataLayout());
+ Type *IntPtrTy = Builder.getIntPtrTy(Load->getDataLayout());
Value *StoreBegin = Builder.CreatePtrToInt(
const_cast<Value *>(StoreLoc.Ptr), IntPtrTy, "store.begin");
Value *StoreEnd = Builder.CreateAdd(
@@ -1813,8 +1860,10 @@ public:
///
/// Call finalizeLowering on lowered instructions. Instructions that are
/// completely eliminated by fusion are added to \p FusedInsts.
- void LowerMatrixMultiplyFused(CallInst *MatMul,
- SmallPtrSetImpl<Instruction *> &FusedInsts) {
+ void
+ LowerMatrixMultiplyFused(CallInst *MatMul,
+ SmallPtrSetImpl<Instruction *> &FusedInsts,
+ SmallVector<IntrinsicInst *, 16> &LifetimeEnds) {
if (!FuseMatrix || !DT)
return;
@@ -1903,6 +1952,55 @@ public:
for (Instruction *I : ToHoist)
I->moveBefore(MatMul);
+ // Deal with lifetime.end calls that might be between Load0/Load1 and the
+ // store. To avoid introducing loads to dead objects (i.e. after the
+ // lifetime has been termined by @llvm.lifetime.end), either sink them
+ // after the store if in the same block, or remove the lifetime.end marker
+ // otherwise. This might pessimize further optimizations, by extending the
+ // lifetime of the object until the function returns, but should be
+ // conservatively correct.
+ MemoryLocation Load0Loc = MemoryLocation::get(LoadOp0);
+ MemoryLocation Load1Loc = MemoryLocation::get(LoadOp1);
+ BasicBlock *StoreParent = Store->getParent();
+ bool FusableOpsInSameBlock = LoadOp0->getParent() == StoreParent &&
+ LoadOp1->getParent() == StoreParent;
+ for (unsigned Idx = 0; Idx != LifetimeEnds.size();) {
+ IntrinsicInst *End = LifetimeEnds[Idx];
+ auto Inc = make_scope_exit([&Idx]() { Idx++; });
+ // If the lifetime.end is guaranteed to be before the loads or after the
+ // store, it won't interfere with fusion.
+ if (DT->dominates(End, LoadOp0) && DT->dominates(End, LoadOp1))
+ continue;
+ if (DT->dominates(Store, End))
+ continue;
+ // If all fusable ops are in the same block and the lifetime.end is in a
+ // different block, it won't interfere with fusion.
+ if (FusableOpsInSameBlock && End->getParent() != StoreParent)
+ continue;
+
+ // If the loads don't alias the lifetime.end, it won't interfere with
+ // fusion.
+ MemoryLocation EndLoc = MemoryLocation::getForArgument(End, 1, nullptr);
+ if (!EndLoc.Ptr)
+ continue;
+ if (AA->isNoAlias(Load0Loc, EndLoc) && AA->isNoAlias(Load1Loc, EndLoc))
+ continue;
+
+ // If both lifetime.end and the store are in the same block, extend the
+ // lifetime until after the store, so the new lifetime covers the loads
+ // we introduce later.
+ if (End->getParent() == StoreParent) {
+ End->moveAfter(Store);
+ continue;
+ }
+
+ // Otherwise remove the conflicting lifetime.end marker.
+ ToRemove.push_back(End);
+ std::swap(LifetimeEnds[Idx], LifetimeEnds.back());
+ LifetimeEnds.pop_back();
+ Inc.release();
+ }
+
emitSIMDTiling(MatMul, LoadOp0, LoadOp1, Store, FusedInsts);
return;
}
@@ -2364,7 +2462,7 @@ public:
RemarkGenerator(const MapVector<Value *, MatrixTy> &Inst2Matrix,
OptimizationRemarkEmitter &ORE, Function &Func)
: Inst2Matrix(Inst2Matrix), ORE(ORE), Func(Func),
- DL(Func.getParent()->getDataLayout()) {}
+ DL(Func.getDataLayout()) {}
/// Return all leaves of the expressions in \p ExprsInSubprogram. Those are
/// instructions in Inst2Matrix returning void or without any users in
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
index 78e474f925b5..aea17aa82a88 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/GuardUtils.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 805bbe40bd7c..cee34f0a6da1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
@@ -99,7 +100,7 @@ struct MemsetRange {
MaybeAlign Alignment;
/// TheStores - The actual stores that make up this range.
- SmallVector<Instruction*, 16> TheStores;
+ SmallVector<Instruction *, 16> TheStores;
bool isProfitableToUseMemset(const DataLayout &DL) const;
};
@@ -108,10 +109,12 @@ struct MemsetRange {
bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If we found more than 4 stores to merge or 16 bytes, use memset.
- if (TheStores.size() >= 4 || End-Start >= 16) return true;
+ if (TheStores.size() >= 4 || End - Start >= 16)
+ return true;
// If there is nothing to merge, don't do anything.
- if (TheStores.size() < 2) return false;
+ if (TheStores.size() < 2)
+ return false;
// If any of the stores are a memset, then it is always good to extend the
// memset.
@@ -121,7 +124,8 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// Assume that the code generator is capable of merging pairs of stores
// together if it wants to.
- if (TheStores.size() == 2) return false;
+ if (TheStores.size() == 2)
+ return false;
// If we have fewer than 8 stores, it can still be worthwhile to do this.
// For example, merging 4 i8 stores into an i32 store is useful almost always.
@@ -133,7 +137,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// the maximum GPR width is the same size as the largest legal integer
// size. If so, check to see whether we will end up actually reducing the
// number of stores used.
- unsigned Bytes = unsigned(End-Start);
+ unsigned Bytes = unsigned(End - Start);
unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
if (MaxIntSize == 0)
MaxIntSize = 1;
@@ -145,7 +149,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If we will reduce the # stores (according to this heuristic), do the
// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
// etc.
- return TheStores.size() > NumPointerStores+NumByteStores;
+ return TheStores.size() > NumPointerStores + NumByteStores;
}
namespace {
@@ -197,7 +201,7 @@ public:
/// existing ranges as appropriate.
void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
MaybeAlign Alignment, Instruction *Inst) {
- int64_t End = Start+Size;
+ int64_t End = Start + Size;
range_iterator I = partition_point(
Ranges, [=](const MemsetRange &O) { return O.End < Start; });
@@ -207,10 +211,10 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
// to insert a new range. Handle this now.
if (I == Ranges.end() || End < I->Start) {
MemsetRange &R = *Ranges.insert(I, MemsetRange());
- R.Start = Start;
- R.End = End;
- R.StartPtr = Ptr;
- R.Alignment = Alignment;
+ R.Start = Start;
+ R.End = End;
+ R.StartPtr = Ptr;
+ R.Alignment = Alignment;
R.TheStores.push_back(Inst);
return;
}
@@ -354,7 +358,7 @@ static void combineAAMetadata(Instruction *ReplInst, Instruction *I) {
Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
Value *StartPtr,
Value *ByteVal) {
- const DataLayout &DL = StartInst->getModule()->getDataLayout();
+ const DataLayout &DL = StartInst->getDataLayout();
// We can't track scalable types
if (auto *SI = dyn_cast<StoreInst>(StartInst))
@@ -397,7 +401,8 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
// If this is a store, see if we can merge it in.
- if (!NextStore->isSimple()) break;
+ if (!NextStore->isSimple())
+ break;
Value *StoredVal = NextStore->getValueOperand();
@@ -460,7 +465,8 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// emit memset's for anything big enough to be worthwhile.
Instruction *AMemSet = nullptr;
for (const MemsetRange &Range : Ranges) {
- if (Range.TheStores.size() == 1) continue;
+ if (Range.TheStores.size() == 1)
+ continue;
// If it is profitable to lower this range to memset, do so now.
if (!Range.isProfitableToUseMemset(DL))
@@ -481,12 +487,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (!Range.TheStores.empty())
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
- auto *NewDef =
- cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
- ? MSSAU->createMemoryAccessBefore(
- AMemSet, nullptr, MemInsertPoint)
- : MSSAU->createMemoryAccessAfter(
- AMemSet, nullptr, MemInsertPoint));
+ auto *NewDef = cast<MemoryDef>(
+ MemInsertPoint->getMemoryInst() == &*BI
+ ? MSSAU->createMemoryAccessBefore(AMemSet, nullptr, MemInsertPoint)
+ : MSSAU->createMemoryAccessAfter(AMemSet, nullptr, MemInsertPoint));
MSSAU->insertDef(NewDef, /*RenameUses=*/true);
MemInsertPoint = NewDef;
@@ -512,12 +516,13 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
// Keep track of the arguments of all instruction we plan to lift
// so we can make sure to lift them as well if appropriate.
- DenseSet<Instruction*> Args;
+ DenseSet<Instruction *> Args;
auto AddArg = [&](Value *Arg) {
auto *I = dyn_cast<Instruction>(Arg);
if (I && I->getParent() == SI->getParent()) {
// Cannot hoist user of P above P
- if (I == P) return false;
+ if (I == P)
+ return false;
Args.insert(I);
}
return true;
@@ -630,8 +635,7 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
const DataLayout &DL,
BasicBlock::iterator &BBI) {
- if (!LI->isSimple() || !LI->hasOneUse() ||
- LI->getParent() != SI->getParent())
+ if (!LI->isSimple() || !LI->hasOneUse() || LI->getParent() != SI->getParent())
return false;
auto *T = LI->getType();
@@ -677,22 +681,21 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
UseMemMove = true;
- uint64_t Size = DL.getTypeStoreSize(T);
-
IRBuilder<> Builder(P);
+ Value *Size =
+ Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T));
Instruction *M;
if (UseMemMove)
- M = Builder.CreateMemMove(
- SI->getPointerOperand(), SI->getAlign(),
- LI->getPointerOperand(), LI->getAlign(), Size);
+ M = Builder.CreateMemMove(SI->getPointerOperand(), SI->getAlign(),
+ LI->getPointerOperand(), LI->getAlign(),
+ Size);
else
- M = Builder.CreateMemCpy(
- SI->getPointerOperand(), SI->getAlign(),
- LI->getPointerOperand(), LI->getAlign(), Size);
+ M = Builder.CreateMemCpy(SI->getPointerOperand(), SI->getAlign(),
+ LI->getPointerOperand(), LI->getAlign(), Size);
M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
- LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
- << *M << "\n");
+ LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M
+ << "\n");
auto *LastDef =
cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
@@ -755,7 +758,8 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
}
bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
- if (!SI->isSimple()) return false;
+ if (!SI->isSimple())
+ return false;
// Avoid merging nontemporal stores since the resulting
// memcpy/memset would not be able to preserve the nontemporal hint.
@@ -766,7 +770,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (SI->getMetadata(LLVMContext::MD_nontemporal))
return false;
- const DataLayout &DL = SI->getModule()->getDataLayout();
+ const DataLayout &DL = SI->getDataLayout();
Value *StoredVal = SI->getValueOperand();
@@ -794,8 +798,8 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// 0xA0A0A0A0 and 0.0.
auto *V = SI->getOperand(0);
if (Value *ByteVal = isBytewiseValue(V, DL)) {
- if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
- ByteVal)) {
+ if (Instruction *I =
+ tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) {
BBI = I->getIterator(); // Don't invalidate iterator.
return true;
}
@@ -816,8 +820,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// The newly inserted memset is immediately overwritten by the original
// store, so we do not need to rename uses.
auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
- auto *NewAccess = MSSAU->createMemoryAccessBefore(
- M, nullptr, StoreDef);
+ auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef);
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
eraseInstruction(SI);
@@ -836,8 +839,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
// See if there is another memset or store neighboring this memset which
// allows us to widen out the memset to do a single larger store.
if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
- if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
- MSI->getValue())) {
+ if (Instruction *I =
+ tryMergingIntoMemset(MSI, MSI->getDest(), MSI->getValue())) {
BBI = I->getIterator(); // Don't invalidate iterator.
return true;
}
@@ -850,7 +853,8 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
Instruction *cpyStore, Value *cpyDest,
Value *cpySrc, TypeSize cpySize,
- Align cpyDestAlign, BatchAAResults &BAA,
+ Align cpyDestAlign,
+ BatchAAResults &BAA,
std::function<CallInst *()> GetC) {
// The general transformation to keep in mind is
//
@@ -879,7 +883,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
if (!srcArraySize)
return false;
- const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
+ const DataLayout &DL = cpyLoad->getDataLayout();
TypeSize SrcAllocaSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType());
// We can't optimize scalable types.
if (SrcAllocaSize.isScalable())
@@ -898,15 +902,15 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
return false;
-
if (C->getParent() != cpyStore->getParent()) {
LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
return false;
}
- MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ?
- MemoryLocation::get(cpyStore) :
- MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
+ MemoryLocation DestLoc =
+ isa<StoreInst>(cpyStore)
+ ? MemoryLocation::get(cpyStore)
+ : MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
// Check that nothing touches the dest of the copy between
// the call and the store/memcpy.
@@ -980,10 +984,8 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
append_range(srcUseList, U->users());
continue;
}
- if (const auto *G = dyn_cast<GetElementPtrInst>(U)) {
- if (!G->hasAllZeroIndices())
- return false;
-
+ if (const auto *G = dyn_cast<GetElementPtrInst>(U);
+ G && G->hasAllZeroIndices()) {
append_range(srcUseList, U->users());
continue;
}
@@ -991,8 +993,10 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
if (IT->isLifetimeStartOrEnd())
continue;
- if (U != C && U != cpyLoad)
+ if (U != C && U != cpyLoad) {
+ LLVM_DEBUG(dbgs() << "Call slot: Source accessed by " << *U << "\n");
return false;
+ }
}
// Check whether src is captured by the called function, in which case there
@@ -1121,28 +1125,79 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
MemCpyInst *MDep,
BatchAAResults &BAA) {
- // We can only transforms memcpy's where the dest of one is the source of the
- // other.
- if (M->getSource() != MDep->getDest() || MDep->isVolatile())
- return false;
-
// If dep instruction is reading from our current input, then it is a noop
- // transfer and substituting the input won't change this instruction. Just
- // ignore the input and let someone else zap MDep. This handles cases like:
+ // transfer and substituting the input won't change this instruction. Just
+ // ignore the input and let someone else zap MDep. This handles cases like:
// memcpy(a <- a)
// memcpy(b <- a)
if (M->getSource() == MDep->getSource())
return false;
- // Second, the length of the memcpy's must be the same, or the preceding one
+ // We can only optimize non-volatile memcpy's.
+ if (MDep->isVolatile())
+ return false;
+
+ int64_t MForwardOffset = 0;
+ const DataLayout &DL = M->getModule()->getDataLayout();
+ // We can only transforms memcpy's where the dest of one is the source of the
+ // other, or they have an offset in a range.
+ if (M->getSource() != MDep->getDest()) {
+ std::optional<int64_t> Offset =
+ M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL);
+ if (!Offset || *Offset < 0)
+ return false;
+ MForwardOffset = *Offset;
+ }
+
+ // The length of the memcpy's must be the same, or the preceding one
// must be larger than the following one.
- if (MDep->getLength() != M->getLength()) {
+ if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) {
auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
auto *MLen = dyn_cast<ConstantInt>(M->getLength());
- if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
+ if (!MDepLen || !MLen ||
+ MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
return false;
}
+ IRBuilder<> Builder(M);
+ auto *CopySource = MDep->getSource();
+ Instruction *NewCopySource = nullptr;
+ auto CleanupOnRet = llvm::make_scope_exit([&NewCopySource] {
+ if (NewCopySource && NewCopySource->use_empty())
+ // Safety: It's safe here because we will only allocate more instructions
+ // after finishing all BatchAA queries, but we have to be careful if we
+ // want to do something like this in another place. Then we'd probably
+ // have to delay instruction removal until all transforms on an
+ // instruction finished.
+ NewCopySource->eraseFromParent();
+ });
+ MaybeAlign CopySourceAlign = MDep->getSourceAlign();
+ // We just need to calculate the actual size of the copy.
+ auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize(
+ MemoryLocation::getForSource(M).Size);
+
+ // When the forwarding offset is greater than 0, we transform
+ // memcpy(d1 <- s1)
+ // memcpy(d2 <- d1+o)
+ // to
+ // memcpy(d2 <- s1+o)
+ if (MForwardOffset > 0) {
+ // The copy destination of `M` maybe can serve as the source of copying.
+ std::optional<int64_t> MDestOffset =
+ M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL);
+ if (MDestOffset == MForwardOffset)
+ CopySource = M->getDest();
+ else {
+ CopySource = Builder.CreateInBoundsPtrAdd(
+ CopySource, Builder.getInt64(MForwardOffset));
+ NewCopySource = dyn_cast<Instruction>(CopySource);
+ }
+ // We need to update `MCopyLoc` if an offset exists.
+ MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
+ if (CopySourceAlign)
+ CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset);
+ }
+
// Verify that the copied-from memory doesn't change in between the two
// transfers. For example, in:
// memcpy(a <- b)
@@ -1152,12 +1207,18 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
//
// TODO: If the code between M and MDep is transparent to the destination "c",
// then we could still perform the xform by moving M up to the first memcpy.
- // TODO: It would be sufficient to check the MDep source up to the memcpy
- // size of M, rather than MDep.
- if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
- MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
+ if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep),
+ MSSA->getMemoryAccess(M)))
return false;
+ // No need to create `memcpy(a <- a)`.
+ if (BAA.isMustAlias(M->getDest(), CopySource)) {
+ // Remove the instruction we're replacing.
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
+
// If the dest of the second might alias the source of the first, then the
// source and dest might overlap. In addition, if the source of the first
// points to constant memory, they won't overlap by definition. Otherwise, we
@@ -1175,27 +1236,27 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// If all checks passed, then we can transform M.
LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
- << *MDep << '\n' << *M << '\n');
+ << *MDep << '\n'
+ << *M << '\n');
// TODO: Is this worth it if we're creating a less aligned memcpy? For
// example we could be moving from movaps -> movq on x86.
- IRBuilder<> Builder(M);
Instruction *NewM;
if (UseMemMove)
- NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
- MDep->getRawSource(), MDep->getSourceAlign(),
- M->getLength(), M->isVolatile());
+ NewM =
+ Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
+ CopySourceAlign, M->getLength(), M->isVolatile());
else if (isa<MemCpyInlineInst>(M)) {
// llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
// never allowed since that would allow the latter to be lowered as a call
// to an external function.
- NewM = Builder.CreateMemCpyInline(
- M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
- MDep->getSourceAlign(), M->getLength(), M->isVolatile());
+ NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
+ CopySource, CopySourceAlign,
+ M->getLength(), M->isVolatile());
} else
- NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
- MDep->getRawSource(), MDep->getSourceAlign(),
- M->getLength(), M->isVolatile());
+ NewM =
+ Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
+ CopySourceAlign, M->getLength(), M->isVolatile());
NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
@@ -1235,6 +1296,15 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
return false;
+ // Don't perform the transform if src_size may be zero. In that case, the
+ // transform is essentially a complex no-op and may lead to an infinite
+ // loop if BasicAA is smart enough to understand that dst and dst + src_size
+ // are still MustAlias after the transform.
+ Value *SrcSize = MemCpy->getLength();
+ if (!isKnownNonZero(SrcSize,
+ SimplifyQuery(MemCpy->getDataLayout(), DT, AC, MemCpy)))
+ return false;
+
// Check that src and dst of the memcpy aren't the same. While memcpy
// operands cannot partially overlap, exact equality is allowed.
if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
@@ -1251,7 +1321,6 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
// Use the same i8* dest as the memcpy, killing the memset dest if different.
Value *Dest = MemCpy->getRawDest();
Value *DestSize = MemSet->getLength();
- Value *SrcSize = MemCpy->getLength();
if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
return false;
@@ -1307,8 +1376,8 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
// memcpy's defining access is the memset about to be removed.
auto *LastDef =
cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
- auto *NewAccess = MSSAU->createMemoryAccessBefore(
- NewMemSet, nullptr, LastDef);
+ auto *NewAccess =
+ MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef);
MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
eraseInstruction(MemSet);
@@ -1338,7 +1407,7 @@ static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
// The size also doesn't matter, as an out-of-bounds access would be UB.
if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
- const DataLayout &DL = Alloca->getModule()->getDataLayout();
+ const DataLayout &DL = Alloca->getDataLayout();
if (std::optional<TypeSize> AllocaSize =
Alloca->getAllocationSize(DL))
if (*AllocaSize == LTSize->getValue())
@@ -1384,7 +1453,7 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
return false;
// A known memcpy size is also required.
- auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
+ auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
if (!CCopySize)
return false;
if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
@@ -1445,7 +1514,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
}
// Check that copy is full with static size.
- const DataLayout &DL = DestAlloca->getModule()->getDataLayout();
+ const DataLayout &DL = DestAlloca->getDataLayout();
std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
if (!SrcSize || Size != *SrcSize) {
LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
@@ -1640,7 +1709,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
static bool isZeroSize(Value *Size) {
if (auto *I = dyn_cast<Instruction>(Size))
- if (auto *Res = simplifyInstruction(I, I->getModule()->getDataLayout()))
+ if (auto *Res = simplifyInstruction(I, I->getDataLayout()))
Size = Res;
// Treat undef/poison size like zero.
if (auto *C = dyn_cast<Constant>(Size))
@@ -1655,7 +1724,8 @@ static bool isZeroSize(Value *Size) {
/// altogether.
bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
// We can only optimize non-volatile memcpy's.
- if (M->isVolatile()) return false;
+ if (M->isVolatile())
+ return false;
// If the source and destination of the memcpy are the same, then zap it.
if (M->getSource() == M->getDest()) {
@@ -1664,8 +1734,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
return true;
}
- // If the size is zero, remove the memcpy. This also prevents infinite loops
- // in processMemSetMemCpyDependence, which is a no-op for zero-length memcpys.
+ // If the size is zero, remove the memcpy.
if (isZeroSize(M->getLength())) {
++BBI;
eraseInstruction(M);
@@ -1681,7 +1750,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
if (GV->isConstant() && GV->hasDefinitiveInitializer())
if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
- M->getModule()->getDataLayout())) {
+ M->getDataLayout())) {
IRBuilder<> Builder(M);
Instruction *NewM = Builder.CreateMemSet(
M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
@@ -1796,11 +1865,10 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
<< "\n");
// If not, then we know we can transform this.
- Type *ArgTys[3] = { M->getRawDest()->getType(),
- M->getRawSource()->getType(),
- M->getLength()->getType() };
- M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
- Intrinsic::memcpy, ArgTys));
+ Type *ArgTys[3] = {M->getRawDest()->getType(), M->getRawSource()->getType(),
+ M->getLength()->getType()};
+ M->setCalledFunction(
+ Intrinsic::getDeclaration(M->getModule(), Intrinsic::memcpy, ArgTys));
// For MemorySSA nothing really changes (except that memcpy may imply stricter
// aliasing guarantees).
@@ -1811,7 +1879,7 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
/// This is called on every byval argument in call sites.
bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
- const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
+ const DataLayout &DL = CB.getDataLayout();
// Find out what feeds this byval argument.
Value *ByValArg = CB.getArgOperand(ArgNo);
Type *ByValTy = CB.getParamByValType(ArgNo);
@@ -1843,7 +1911,8 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// Get the alignment of the byval. If the call doesn't specify the alignment,
// then it is some target specific value that we can't know.
MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
- if (!ByValAlign) return false;
+ if (!ByValAlign)
+ return false;
// If it is greater than the memcpy, then we check to see if we can force the
// source of the memcpy to the alignment we need. If we fail, we bail out.
@@ -1897,7 +1966,7 @@ bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
if (!(CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
CB.paramHasAttr(ArgNo, Attribute::NoCapture)))
return false;
- const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
+ const DataLayout &DL = CB.getDataLayout();
Value *ImmutArg = CB.getArgOperand(ArgNo);
// 2. Check that arg is alloca
@@ -1987,7 +2056,7 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
continue;
for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
- // Avoid invalidating the iterator.
+ // Avoid invalidating the iterator.
Instruction *I = &*BI++;
bool RepeatInstruction = false;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index 1e0906717549..4291f3aee0cd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -74,7 +74,7 @@ namespace {
struct BCEAtom {
BCEAtom() = default;
BCEAtom(GetElementPtrInst *GEP, LoadInst *LoadI, int BaseId, APInt Offset)
- : GEP(GEP), LoadI(LoadI), BaseId(BaseId), Offset(Offset) {}
+ : GEP(GEP), LoadI(LoadI), BaseId(BaseId), Offset(std::move(Offset)) {}
BCEAtom(const BCEAtom &) = delete;
BCEAtom &operator=(const BCEAtom &) = delete;
@@ -151,7 +151,7 @@ BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
LLVM_DEBUG(dbgs() << "from non-zero AddressSpace\n");
return {};
}
- const auto &DL = LoadI->getModule()->getDataLayout();
+ const auto &DL = LoadI->getDataLayout();
if (!isDereferenceablePointer(Addr, LoadI->getType(), DL)) {
LLVM_DEBUG(dbgs() << "not dereferenceable\n");
// We need to make sure that we can do comparison in any order, so we
@@ -325,7 +325,7 @@ std::optional<BCECmp> visitICmp(const ICmpInst *const CmpI,
auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1), BaseId);
if (!Rhs.BaseId)
return std::nullopt;
- const auto &DL = CmpI->getModule()->getDataLayout();
+ const auto &DL = CmpI->getDataLayout();
return BCECmp(std::move(Lhs), std::move(Rhs),
DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()), CmpI);
}
@@ -658,7 +658,7 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
unsigned IntBits = TLI.getIntSize();
// Create memcmp() == 0.
- const auto &DL = Phi.getModule()->getDataLayout();
+ const auto &DL = Phi.getDataLayout();
Value *const MemCmpCall = emitMemCmp(
Lhs, Rhs,
ConstantInt::get(Builder.getIntNTy(SizeTBits), TotalSizeBits / 8),
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index d65054a6ff9d..299239fb7020 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -199,7 +199,7 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
CastInst::isBitOrNoopPointerCastable(
Store0->getValueOperand()->getType(),
Store1->getValueOperand()->getType(),
- Store0->getModule()->getDataLayout()))
+ Store0->getDataLayout()))
return Store1;
}
return nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index 7fe1a222021e..c00c71fcb0b4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -205,7 +205,7 @@ bool NaryReassociatePass::runImpl(Function &F, AssumptionCache *AC_,
SE = SE_;
TLI = TLI_;
TTI = TTI_;
- DL = &F.getParent()->getDataLayout();
+ DL = &F.getDataLayout();
bool Changed = false, ChangedInThisIteration;
do {
@@ -511,14 +511,15 @@ Instruction *NaryReassociatePass::tryReassociatedBinaryOp(const SCEV *LHSExpr,
Instruction *NewI = nullptr;
switch (I->getOpcode()) {
case Instruction::Add:
- NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
+ NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I->getIterator());
break;
case Instruction::Mul:
- NewI = BinaryOperator::CreateMul(LHS, RHS, "", I);
+ NewI = BinaryOperator::CreateMul(LHS, RHS, "", I->getIterator());
break;
default:
llvm_unreachable("Unexpected instruction.");
}
+ NewI->setDebugLoc(I->getDebugLoc());
NewI->takeName(I);
return NewI;
}
@@ -564,14 +565,24 @@ NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr,
// optimization makes the algorithm O(n).
while (!Candidates.empty()) {
// Candidates stores WeakTrackingVHs, so a candidate can be nullptr if it's
- // removed
- // during rewriting.
- if (Value *Candidate = Candidates.back()) {
+ // removed during rewriting.
+ if (Value *Candidate = Candidates.pop_back_val()) {
Instruction *CandidateInstruction = cast<Instruction>(Candidate);
- if (DT->dominates(CandidateInstruction, Dominatee))
- return CandidateInstruction;
+ if (!DT->dominates(CandidateInstruction, Dominatee))
+ continue;
+
+ // Make sure that the instruction is safe to reuse without introducing
+ // poison.
+ SmallVector<Instruction *> DropPoisonGeneratingInsts;
+ if (!SE->canReuseInstruction(CandidateExpr, CandidateInstruction,
+ DropPoisonGeneratingInsts))
+ continue;
+
+ for (Instruction *I : DropPoisonGeneratingInsts)
+ I->dropPoisonGeneratingAnnotations();
+
+ return CandidateInstruction;
}
- Candidates.pop_back();
}
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 19ac9526b5f8..fc0b31c43396 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -529,7 +529,11 @@ class NewGVN {
// IR.
SmallPtrSet<const Instruction *, 8> PHINodeUses;
- DenseMap<const Value *, bool> OpSafeForPHIOfOps;
+ // The cached results, in general, are only valid for the specific block where
+ // they were computed. The unsigned part of the key is a unique block
+ // identifier
+ DenseMap<std::pair<const Value *, unsigned>, bool> OpSafeForPHIOfOps;
+ unsigned CacheIdx;
// Map a temporary instruction we created to a parent block.
DenseMap<const Value *, BasicBlock *> TempToBlock;
@@ -892,7 +896,7 @@ private:
// Debug counter info. When verifying, we have to reset the value numbering
// debug counter to the same state it started in to get the same results.
- int64_t StartingVNCounter = 0;
+ DebugCounter::CounterState StartingVNCounter;
};
} // end anonymous namespace
@@ -1199,7 +1203,7 @@ NewGVN::ExprResult NewGVN::createExpression(Instruction *I) const {
} else if (auto *GEPI = dyn_cast<GetElementPtrInst>(I)) {
Value *V = simplifyGEPInst(GEPI->getSourceElementType(), *E->op_begin(),
ArrayRef(std::next(E->op_begin()), E->op_end()),
- GEPI->isInBounds(), Q);
+ GEPI->getNoWrapFlags(), Q);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
} else if (AllConstant) {
@@ -2525,18 +2529,14 @@ void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) {
BasicBlock *TargetBlock = Case.getCaseSuccessor();
updateReachableEdge(B, TargetBlock);
} else {
- for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
- BasicBlock *TargetBlock = SI->getSuccessor(i);
+ for (BasicBlock *TargetBlock : successors(SI->getParent()))
updateReachableEdge(B, TargetBlock);
- }
}
} else {
// Otherwise this is either unconditional, or a type we have no
// idea about. Just mark successors as reachable.
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
- BasicBlock *TargetBlock = TI->getSuccessor(i);
+ for (BasicBlock *TargetBlock : successors(TI->getParent()))
updateReachableEdge(B, TargetBlock);
- }
// This also may be a memory defining terminator, in which case, set it
// equivalent only to itself.
@@ -2600,19 +2600,19 @@ bool NewGVN::OpIsSafeForPHIOfOps(Value *V, const BasicBlock *PHIBlock,
if (!isa<Instruction>(I))
continue;
- auto OISIt = OpSafeForPHIOfOps.find(I);
+ auto OISIt = OpSafeForPHIOfOps.find({I, CacheIdx});
if (OISIt != OpSafeForPHIOfOps.end())
return OISIt->second;
// Keep walking until we either dominate the phi block, or hit a phi, or run
// out of things to check.
if (DT->properlyDominates(getBlockForValue(I), PHIBlock)) {
- OpSafeForPHIOfOps.insert({I, true});
+ OpSafeForPHIOfOps.insert({{I, CacheIdx}, true});
continue;
}
// PHI in the same block.
if (isa<PHINode>(I) && getBlockForValue(I) == PHIBlock) {
- OpSafeForPHIOfOps.insert({I, false});
+ OpSafeForPHIOfOps.insert({{I, CacheIdx}, false});
return false;
}
@@ -2631,10 +2631,10 @@ bool NewGVN::OpIsSafeForPHIOfOps(Value *V, const BasicBlock *PHIBlock,
if (!isa<Instruction>(Op))
continue;
// Stop now if we find an unsafe operand.
- auto OISIt = OpSafeForPHIOfOps.find(OrigI);
+ auto OISIt = OpSafeForPHIOfOps.find({OrigI, CacheIdx});
if (OISIt != OpSafeForPHIOfOps.end()) {
if (!OISIt->second) {
- OpSafeForPHIOfOps.insert({I, false});
+ OpSafeForPHIOfOps.insert({{I, CacheIdx}, false});
return false;
}
continue;
@@ -2644,7 +2644,7 @@ bool NewGVN::OpIsSafeForPHIOfOps(Value *V, const BasicBlock *PHIBlock,
Worklist.push_back(cast<Instruction>(Op));
}
}
- OpSafeForPHIOfOps.insert({V, true});
+ OpSafeForPHIOfOps.insert({{V, CacheIdx}, true});
return true;
}
@@ -3278,7 +3278,7 @@ void NewGVN::verifyIterationSettled(Function &F) {
#ifndef NDEBUG
LLVM_DEBUG(dbgs() << "Beginning iteration verification\n");
if (DebugCounter::isCounterSet(VNCounter))
- DebugCounter::setCounterValue(VNCounter, StartingVNCounter);
+ DebugCounter::setCounterState(VNCounter, StartingVNCounter);
// Note that we have to store the actual classes, as we may change existing
// classes during iteration. This is because our memory iteration propagation
@@ -3297,6 +3297,7 @@ void NewGVN::verifyIterationSettled(Function &F) {
TouchedInstructions.set();
TouchedInstructions.reset(0);
OpSafeForPHIOfOps.clear();
+ CacheIdx = 0;
iterateTouchedInstructions();
DenseSet<std::pair<const CongruenceClass *, const CongruenceClass *>>
EqualClasses;
@@ -3400,6 +3401,8 @@ void NewGVN::iterateTouchedInstructions() {
<< " because it is unreachable\n");
continue;
}
+ // Use the appropriate cache for "OpIsSafeForPHIOfOps".
+ CacheIdx = RPOOrdering.lookup(DT->getNode(CurrBlock)) - 1;
updateProcessedCount(CurrBlock);
}
// Reset after processing (because we may mark ourselves as touched when
@@ -3423,7 +3426,7 @@ void NewGVN::iterateTouchedInstructions() {
// This is the main transformation entry point.
bool NewGVN::runGVN() {
if (DebugCounter::isCounterSet(VNCounter))
- StartingVNCounter = DebugCounter::getCounterValue(VNCounter);
+ StartingVNCounter = DebugCounter::getCounterState(VNCounter);
bool Changed = false;
NumFuncArgs = F.arg_size();
MSSAWalker = MSSA->getWalker();
@@ -3479,6 +3482,8 @@ bool NewGVN::runGVN() {
LLVM_DEBUG(dbgs() << "Block " << getBlockName(&F.getEntryBlock())
<< " marked reachable\n");
ReachableBlocks.insert(&F.getEntryBlock());
+ // Use index corresponding to entry block.
+ CacheIdx = 0;
iterateTouchedInstructions();
verifyMemoryCongruency();
@@ -3721,7 +3726,7 @@ void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) {
new StoreInst(
PoisonValue::get(Int8Ty),
Constant::getNullValue(PointerType::getUnqual(BB->getContext())),
- BB->getTerminator());
+ BB->getTerminator()->getIterator());
}
void NewGVN::markInstructionForDeletion(Instruction *I) {
@@ -4019,7 +4024,7 @@ bool NewGVN::eliminateInstructions(Function &F) {
// dominated defs as dead.
if (Def) {
// For anything in this case, what and how we value number
- // guarantees that any side-effets that would have occurred (ie
+ // guarantees that any side-effects that would have occurred (ie
// throwing, etc) can be proven to either still occur (because it's
// dominated by something that has the same side-effects), or never
// occur. Otherwise, we would not have been able to prove it value
@@ -4237,7 +4242,7 @@ PreservedAnalyses NewGVNPass::run(Function &F, AnalysisManager<Function> &AM) {
auto &AA = AM.getResult<AAManager>(F);
auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
bool Changed =
- NewGVN(F, &DT, &AC, &TLI, &AA, &MSSA, F.getParent()->getDataLayout())
+ NewGVN(F, &DT, &AC, &TLI, &AA, &MSSA, F.getDataLayout())
.runGVN();
if (!Changed)
return PreservedAnalyses::all();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 0266eb1a9f50..77d67a2ce0f3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -60,6 +60,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -190,7 +191,7 @@ static bool enableBackedgeSafepoints(Function &F);
static bool enableCallSafepoints(Function &F);
static void
-InsertSafepointPoll(Instruction *InsertBefore,
+InsertSafepointPoll(BasicBlock::iterator InsertBefore,
std::vector<CallBase *> &ParsePointsNeeded /*rval*/,
const TargetLibraryInfo &TLI);
@@ -288,6 +289,8 @@ bool PlaceSafepointsPass::runImpl(Function &F, const TargetLibraryInfo &TLI) {
// with for the moment.
legacy::FunctionPassManager FPM(F.getParent());
bool CanAssumeCallSafepoints = enableCallSafepoints(F);
+
+ FPM.add(new TargetLibraryInfoWrapperPass(TLI));
auto *PBS = new PlaceBackedgeSafepointsLegacyPass(CanAssumeCallSafepoints);
FPM.add(PBS);
FPM.run(F);
@@ -308,8 +311,7 @@ bool PlaceSafepointsPass::runImpl(Function &F, const TargetLibraryInfo &TLI) {
// We can sometimes end up with duplicate poll locations. This happens if
// a single loop is visited more than once. The fact this happens seems
// wrong, but it does happen for the split-backedge.ll test case.
- PollLocations.erase(std::unique(PollLocations.begin(), PollLocations.end()),
- PollLocations.end());
+ PollLocations.erase(llvm::unique(PollLocations), PollLocations.end());
// Insert a poll at each point the analysis pass identified
// The poll location must be the terminator of a loop latch block.
@@ -368,7 +370,7 @@ bool PlaceSafepointsPass::runImpl(Function &F, const TargetLibraryInfo &TLI) {
// safepoint polls themselves.
for (Instruction *PollLocation : PollsNeeded) {
std::vector<CallBase *> RuntimeCalls;
- InsertSafepointPoll(PollLocation, RuntimeCalls, TLI);
+ InsertSafepointPoll(PollLocation->getIterator(), RuntimeCalls, TLI);
llvm::append_range(ParsePointNeeded, RuntimeCalls);
}
@@ -517,7 +519,7 @@ static bool doesNotRequireEntrySafepointBefore(CallBase *Call) {
switch (II->getIntrinsicID()) {
case Intrinsic::experimental_gc_statepoint:
case Intrinsic::experimental_patchpoint_void:
- case Intrinsic::experimental_patchpoint_i64:
+ case Intrinsic::experimental_patchpoint:
// The can wrap an actual call which may grow the stack by an unbounded
// amount or run forever.
return false;
@@ -591,7 +593,7 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
const char GCSafepointPollName[] = "gc.safepoint_poll";
static bool isGCSafepointPoll(Function &F) {
- return F.getName().equals(GCSafepointPollName);
+ return F.getName() == GCSafepointPollName;
}
/// Returns true if this function should be rewritten to include safepoint
@@ -619,7 +621,7 @@ static bool enableCallSafepoints(Function &F) { return !NoCall; }
// not handle the parsability of state at the runtime call, that's the
// callers job.
static void
-InsertSafepointPoll(Instruction *InsertBefore,
+InsertSafepointPoll(BasicBlock::iterator InsertBefore,
std::vector<CallBase *> &ParsePointsNeeded /*rval*/,
const TargetLibraryInfo &TLI) {
BasicBlock *OrigBB = InsertBefore->getParent();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 818c7b40d489..e742d2ed12af 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -246,7 +246,8 @@ void ReassociatePass::canonicalizeOperands(Instruction *I) {
}
static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
- Instruction *InsertBefore, Value *FlagsOp) {
+ BasicBlock::iterator InsertBefore,
+ Value *FlagsOp) {
if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
else {
@@ -258,7 +259,8 @@ static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
}
static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
- Instruction *InsertBefore, Value *FlagsOp) {
+ BasicBlock::iterator InsertBefore,
+ Value *FlagsOp) {
if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
else {
@@ -270,7 +272,8 @@ static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
}
static Instruction *CreateNeg(Value *S1, const Twine &Name,
- Instruction *InsertBefore, Value *FlagsOp) {
+ BasicBlock::iterator InsertBefore,
+ Value *FlagsOp) {
if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
@@ -290,7 +293,8 @@ static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
Constant *NegOne = Ty->isIntOrIntVectorTy() ?
ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
- BinaryOperator *Res = CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg, Neg);
+ BinaryOperator *Res =
+ CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg->getIterator(), Neg);
Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
Res->takeName(Neg);
Neg->replaceAllUsesWith(Res);
@@ -298,98 +302,7 @@ static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
return Res;
}
-/// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael
-/// function. This means that x^(2^k) === 1 mod 2^Bitwidth for
-/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic.
-/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every
-/// even x in Bitwidth-bit arithmetic.
-static unsigned CarmichaelShift(unsigned Bitwidth) {
- if (Bitwidth < 3)
- return Bitwidth - 1;
- return Bitwidth - 2;
-}
-
-/// Add the extra weight 'RHS' to the existing weight 'LHS',
-/// reducing the combined weight using any special properties of the operation.
-/// The existing weight LHS represents the computation X op X op ... op X where
-/// X occurs LHS times. The combined weight represents X op X op ... op X with
-/// X occurring LHS + RHS times. If op is "Xor" for example then the combined
-/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even;
-/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second.
-static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) {
- // If we were working with infinite precision arithmetic then the combined
- // weight would be LHS + RHS. But we are using finite precision arithmetic,
- // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct
- // for nilpotent operations and addition, but not for idempotent operations
- // and multiplication), so it is important to correctly reduce the combined
- // weight back into range if wrapping would be wrong.
-
- // If RHS is zero then the weight didn't change.
- if (RHS.isMinValue())
- return;
- // If LHS is zero then the combined weight is RHS.
- if (LHS.isMinValue()) {
- LHS = RHS;
- return;
- }
- // From this point on we know that neither LHS nor RHS is zero.
-
- if (Instruction::isIdempotent(Opcode)) {
- // Idempotent means X op X === X, so any non-zero weight is equivalent to a
- // weight of 1. Keeping weights at zero or one also means that wrapping is
- // not a problem.
- assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
- return; // Return a weight of 1.
- }
- if (Instruction::isNilpotent(Opcode)) {
- // Nilpotent means X op X === 0, so reduce weights modulo 2.
- assert(LHS == 1 && RHS == 1 && "Weights not reduced!");
- LHS = 0; // 1 + 1 === 0 modulo 2.
- return;
- }
- if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) {
- // TODO: Reduce the weight by exploiting nsw/nuw?
- LHS += RHS;
- return;
- }
-
- assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) &&
- "Unknown associative operation!");
- unsigned Bitwidth = LHS.getBitWidth();
- // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth
- // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth
- // bit number x, since either x is odd in which case x^CM = 1, or x is even in
- // which case both x^W and x^(W - CM) are zero. By subtracting off multiples
- // of CM like this weights can always be reduced to the range [0, CM+Bitwidth)
- // which by a happy accident means that they can always be represented using
- // Bitwidth bits.
- // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than
- // the Carmichael number).
- if (Bitwidth > 3) {
- /// CM - The value of Carmichael's lambda function.
- APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth));
- // Any weight W >= Threshold can be replaced with W - CM.
- APInt Threshold = CM + Bitwidth;
- assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!");
- // For Bitwidth 4 or more the following sum does not overflow.
- LHS += RHS;
- while (LHS.uge(Threshold))
- LHS -= CM;
- } else {
- // To avoid problems with overflow do everything the same as above but using
- // a larger type.
- unsigned CM = 1U << CarmichaelShift(Bitwidth);
- unsigned Threshold = CM + Bitwidth;
- assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold &&
- "Weights not reduced!");
- unsigned Total = LHS.getZExtValue() + RHS.getZExtValue();
- while (Total >= Threshold)
- Total -= CM;
- LHS = Total;
- }
-}
-
-using RepeatedValue = std::pair<Value*, APInt>;
+using RepeatedValue = std::pair<Value *, uint64_t>;
/// Given an associative binary expression, return the leaf
/// nodes in Ops along with their weights (how many times the leaf occurs). The
@@ -467,11 +380,10 @@ using RepeatedValue = std::pair<Value*, APInt>;
static bool LinearizeExprTree(Instruction *I,
SmallVectorImpl<RepeatedValue> &Ops,
ReassociatePass::OrderedSet &ToRedo,
- bool &HasNUW) {
+ reassociate::OverflowTracking &Flags) {
assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
"Expected a UnaryOperator or BinaryOperator!");
LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
- unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
unsigned Opcode = I->getOpcode();
assert(I->isAssociative() && I->isCommutative() &&
"Expected an associative and commutative operation!");
@@ -486,8 +398,8 @@ static bool LinearizeExprTree(Instruction *I,
// with their weights, representing a certain number of paths to the operator.
// If an operator occurs in the worklist multiple times then we found multiple
// ways to get to it.
- SmallVector<std::pair<Instruction*, APInt>, 8> Worklist; // (Op, Weight)
- Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
+ SmallVector<std::pair<Instruction *, uint64_t>, 8> Worklist; // (Op, Weight)
+ Worklist.push_back(std::make_pair(I, 1));
bool Changed = false;
// Leaves of the expression are values that either aren't the right kind of
@@ -505,23 +417,25 @@ static bool LinearizeExprTree(Instruction *I,
// Leaves - Keeps track of the set of putative leaves as well as the number of
// paths to each leaf seen so far.
- using LeafMap = DenseMap<Value *, APInt>;
+ using LeafMap = DenseMap<Value *, uint64_t>;
LeafMap Leaves; // Leaf -> Total weight so far.
SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
+ const DataLayout DL = I->getDataLayout();
#ifndef NDEBUG
SmallPtrSet<Value *, 8> Visited; // For checking the iteration scheme.
#endif
while (!Worklist.empty()) {
- std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
- I = P.first; // We examine the operands of this binary operator.
+ // We examine the operands of this binary operator.
+ auto [I, Weight] = Worklist.pop_back_val();
- if (isa<OverflowingBinaryOperator>(I))
- HasNUW &= I->hasNoUnsignedWrap();
+ if (isa<OverflowingBinaryOperator>(I)) {
+ Flags.HasNUW &= I->hasNoUnsignedWrap();
+ Flags.HasNSW &= I->hasNoSignedWrap();
+ }
for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
Value *Op = I->getOperand(OpIdx);
- APInt Weight = P.second; // Number of paths to this operand.
LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
assert(!Op->use_empty() && "No uses, so how did we get to it?!");
@@ -555,26 +469,8 @@ static bool LinearizeExprTree(Instruction *I,
"In leaf map but not visited!");
// Update the number of paths to the leaf.
- IncorporateWeight(It->second, Weight, Opcode);
-
-#if 0 // TODO: Re-enable once PR13021 is fixed.
- // The leaf already has one use from inside the expression. As we want
- // exactly one such use, drop this new use of the leaf.
- assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
- I->setOperand(OpIdx, UndefValue::get(I->getType()));
- Changed = true;
-
- // If the leaf is a binary operation of the right kind and we now see
- // that its multiple original uses were in fact all by nodes belonging
- // to the expression, then no longer consider it to be a leaf and add
- // its operands to the expression.
- if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) {
- LLVM_DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n");
- Worklist.push_back(std::make_pair(BO, It->second));
- Leaves.erase(It);
- continue;
- }
-#endif
+ It->second += Weight;
+ assert(It->second >= Weight && "Weight overflows");
// If we still have uses that are not accounted for by the expression
// then it is not safe to modify the value.
@@ -637,13 +533,22 @@ static bool LinearizeExprTree(Instruction *I,
// Node initially thought to be a leaf wasn't.
continue;
assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
- APInt Weight = It->second;
- if (Weight.isMinValue())
- // Leaf already output or weight reduction eliminated it.
- continue;
+ uint64_t Weight = It->second;
// Ensure the leaf is only output once.
It->second = 0;
Ops.push_back(std::make_pair(V, Weight));
+ if (Opcode == Instruction::Add && Flags.AllKnownNonNegative && Flags.HasNSW)
+ Flags.AllKnownNonNegative &= isKnownNonNegative(V, SimplifyQuery(DL));
+ else if (Opcode == Instruction::Mul) {
+ // To preserve NUW we need all inputs non-zero.
+ // To preserve NSW we need all inputs strictly positive.
+ if (Flags.AllKnownNonZero &&
+ (Flags.HasNUW || (Flags.HasNSW && Flags.AllKnownNonNegative))) {
+ Flags.AllKnownNonZero &= isKnownNonZero(V, SimplifyQuery(DL));
+ if (Flags.HasNSW && Flags.AllKnownNonNegative)
+ Flags.AllKnownNonNegative &= isKnownNonNegative(V, SimplifyQuery(DL));
+ }
+ }
}
// For nilpotent operations or addition there may be no operands, for example
@@ -652,7 +557,7 @@ static bool LinearizeExprTree(Instruction *I,
if (Ops.empty()) {
Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType());
assert(Identity && "Associative operation without identity!");
- Ops.emplace_back(Identity, APInt(Bitwidth, 1));
+ Ops.emplace_back(Identity, 1);
}
return Changed;
@@ -662,7 +567,7 @@ static bool LinearizeExprTree(Instruction *I,
/// linearized and optimized, emit them in-order.
void ReassociatePass::RewriteExprTree(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops,
- bool HasNUW) {
+ OverflowTracking Flags) {
assert(Ops.size() > 1 && "Single values should be used directly!");
// Since our optimizations should never increase the number of operations, the
@@ -691,8 +596,8 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
/// of leaf nodes as inner nodes cannot occur by remembering all of the future
/// leaves and refusing to reuse any of them as inner nodes.
SmallPtrSet<Value*, 8> NotRewritable;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- NotRewritable.insert(Ops[i].Op);
+ for (const ValueEntry &Op : Ops)
+ NotRewritable.insert(Op.Op);
// ExpressionChangedStart - Non-null if the rewritten expression differs from
// the original in some non-trivial way, requiring the clearing of optional
@@ -792,9 +697,9 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
// stupid, create a new node if there are none left.
BinaryOperator *NewOp;
if (NodesToRewrite.empty()) {
- Constant *Undef = UndefValue::get(I->getType());
- NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
- Undef, Undef, "", I);
+ Constant *Poison = PoisonValue::get(I->getType());
+ NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode), Poison,
+ Poison, "", I->getIterator());
if (isa<FPMathOperator>(NewOp))
NewOp->setFastMathFlags(I->getFastMathFlags());
} else {
@@ -827,11 +732,14 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
ExpressionChangedStart->setFastMathFlags(Flags);
} else {
ExpressionChangedStart->clearSubclassOptionalData();
- // Note that it doesn't hold for mul if one of the operands is zero.
- // TODO: We can preserve NUW flag if we prove that all mul operands
- // are non-zero.
- if (HasNUW && ExpressionChangedStart->getOpcode() == Instruction::Add)
- ExpressionChangedStart->setHasNoUnsignedWrap();
+ if (ExpressionChangedStart->getOpcode() == Instruction::Add ||
+ (ExpressionChangedStart->getOpcode() == Instruction::Mul &&
+ Flags.AllKnownNonZero)) {
+ if (Flags.HasNUW)
+ ExpressionChangedStart->setHasNoUnsignedWrap();
+ if (Flags.HasNSW && (Flags.AllKnownNonNegative || Flags.HasNUW))
+ ExpressionChangedStart->setHasNoSignedWrap();
+ }
}
}
@@ -854,8 +762,8 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
}
// Throw away any left over nodes from the original expression.
- for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
- RedoInsts.insert(NodesToRewrite[i]);
+ for (BinaryOperator *BO : NodesToRewrite)
+ RedoInsts.insert(BO);
}
/// Insert instructions before the instruction pointed to by BI,
@@ -868,7 +776,7 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
static Value *NegateValue(Value *V, Instruction *BI,
ReassociatePass::OrderedSet &ToRedo) {
if (auto *C = dyn_cast<Constant>(V)) {
- const DataLayout &DL = BI->getModule()->getDataLayout();
+ const DataLayout &DL = BI->getDataLayout();
Constant *Res = C->getType()->isFPOrFPVectorTy()
? ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)
: ConstantExpr::getNeg(C);
@@ -945,7 +853,13 @@ static Value *NegateValue(Value *V, Instruction *BI,
->getIterator();
}
+ // Check that if TheNeg is moved out of its parent block, we drop its
+ // debug location to avoid extra coverage.
+ // See test dropping_debugloc_the_neg.ll for a detailed example.
+ if (TheNeg->getParent() != InsertPt->getParent())
+ TheNeg->dropLocation();
TheNeg->moveBefore(*InsertPt->getParent(), InsertPt);
+
if (TheNeg->getOpcode() == Instruction::Sub) {
TheNeg->setHasNoUnsignedWrap(false);
TheNeg->setHasNoSignedWrap(false);
@@ -958,7 +872,8 @@ static Value *NegateValue(Value *V, Instruction *BI,
// Insert a 'neg' instruction that subtracts the value from zero to get the
// negation.
- Instruction *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
+ Instruction *NewNeg =
+ CreateNeg(V, V->getName() + ".neg", BI->getIterator(), BI);
ToRedo.insert(NewNeg);
return NewNeg;
}
@@ -1044,8 +959,8 @@ static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or) {
/// transform this into (X+Y) to allow arithmetics reassociation.
static BinaryOperator *convertOrWithNoCommonBitsToAdd(Instruction *Or) {
// Convert an or into an add.
- BinaryOperator *New =
- CreateAdd(Or->getOperand(0), Or->getOperand(1), "", Or, Or);
+ BinaryOperator *New = CreateAdd(Or->getOperand(0), Or->getOperand(1), "",
+ Or->getIterator(), Or);
New->setHasNoSignedWrap();
New->setHasNoUnsignedWrap();
New->takeName(Or);
@@ -1097,7 +1012,8 @@ static BinaryOperator *BreakUpSubtract(Instruction *Sub,
// Calculate the negative value of Operand 1 of the sub instruction,
// and set it as the RHS of the add instruction we just made.
Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
- BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
+ BinaryOperator *New =
+ CreateAdd(Sub->getOperand(0), NegVal, "", Sub->getIterator(), Sub);
Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
New->takeName(Sub);
@@ -1115,10 +1031,11 @@ static BinaryOperator *BreakUpSubtract(Instruction *Sub,
static BinaryOperator *ConvertShiftToMul(Instruction *Shl) {
Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
auto *SA = cast<ConstantInt>(Shl->getOperand(1));
- MulCst = ConstantExpr::getShl(MulCst, SA);
+ MulCst = ConstantFoldBinaryInstruction(Instruction::Shl, MulCst, SA);
+ assert(MulCst && "Constant folding of immediate constants failed");
- BinaryOperator *Mul =
- BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
+ BinaryOperator *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst,
+ "", Shl->getIterator());
Shl->setOperand(0, PoisonValue::get(Shl->getType())); // Drop use of op.
Mul->takeName(Shl);
@@ -1168,13 +1085,13 @@ static unsigned FindInOperandList(const SmallVectorImpl<ValueEntry> &Ops,
/// Emit a tree of add instructions, summing Ops together
/// and returning the result. Insert the tree before I.
-static Value *EmitAddTreeOfValues(Instruction *I,
+static Value *EmitAddTreeOfValues(BasicBlock::iterator It,
SmallVectorImpl<WeakTrackingVH> &Ops) {
if (Ops.size() == 1) return Ops.back();
Value *V1 = Ops.pop_back_val();
- Value *V2 = EmitAddTreeOfValues(I, Ops);
- return CreateAdd(V2, V1, "reass.add", I, I);
+ Value *V2 = EmitAddTreeOfValues(It, Ops);
+ return CreateAdd(V2, V1, "reass.add", It, &*It);
}
/// If V is an expression tree that is a multiplication sequence,
@@ -1186,14 +1103,13 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
return nullptr;
SmallVector<RepeatedValue, 8> Tree;
- bool HasNUW = true;
- MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts, HasNUW);
+ OverflowTracking Flags;
+ MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts, Flags);
SmallVector<ValueEntry, 8> Factors;
Factors.reserve(Tree.size());
for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
RepeatedValue E = Tree[i];
- Factors.append(E.second.getZExtValue(),
- ValueEntry(getRank(E.first), E.first));
+ Factors.append(E.second, ValueEntry(getRank(E.first), E.first));
}
bool FoundFactor = false;
@@ -1229,7 +1145,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
if (!FoundFactor) {
// Make sure to restore the operands to the expression tree.
- RewriteExprTree(BO, Factors, HasNUW);
+ RewriteExprTree(BO, Factors, Flags);
return nullptr;
}
@@ -1241,12 +1157,12 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
RedoInsts.insert(BO);
V = Factors[0].Op;
} else {
- RewriteExprTree(BO, Factors, HasNUW);
+ RewriteExprTree(BO, Factors, Flags);
V = BO;
}
if (NeedsNegate)
- V = CreateNeg(V, "neg", &*InsertPt, BO);
+ V = CreateNeg(V, "neg", InsertPt, BO);
return V;
}
@@ -1321,7 +1237,7 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
/// instruction. There are two special cases: 1) if the constant operand is 0,
/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
/// be returned.
-static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
+static Value *createAndInstr(BasicBlock::iterator InsertBefore, Value *Opnd,
const APInt &ConstOpnd) {
if (ConstOpnd.isZero())
return nullptr;
@@ -1342,7 +1258,7 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
// If it was successful, true is returned, and the "R" and "C" is returned
// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
// and both "Res" and "ConstOpnd" remain unchanged.
-bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
+bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
APInt &ConstOpnd, Value *&Res) {
// Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2
// = ((x | c1) ^ c1) ^ (c1 ^ c2)
@@ -1359,7 +1275,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
return false;
Value *X = Opnd1->getSymbolicPart();
- Res = createAndInstr(I, X, ~C1);
+ Res = createAndInstr(It, X, ~C1);
// ConstOpnd was C2, now C1 ^ C2.
ConstOpnd ^= C1;
@@ -1376,7 +1292,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
// via "Res" and "ConstOpnd", respectively (If the entire expression is
// evaluated to a constant, the Res is set to NULL); otherwise, false is
// returned, and both "Res" and "ConstOpnd" remain unchanged.
-bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
+bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
XorOpnd *Opnd2, APInt &ConstOpnd,
Value *&Res) {
Value *X = Opnd1->getSymbolicPart();
@@ -1411,7 +1327,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
return false;
}
- Res = createAndInstr(I, X, C3);
+ Res = createAndInstr(It, X, C3);
ConstOpnd ^= C1;
} else if (Opnd1->isOrExpr()) {
// Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
@@ -1427,7 +1343,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
return false;
}
- Res = createAndInstr(I, X, C3);
+ Res = createAndInstr(It, X, C3);
ConstOpnd ^= C3;
} else {
// Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
@@ -1435,7 +1351,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
const APInt &C1 = Opnd1->getConstPart();
const APInt &C2 = Opnd2->getConstPart();
APInt C3 = C1 ^ C2;
- Res = createAndInstr(I, X, C3);
+ Res = createAndInstr(It, X, C3);
}
// Put the original operands in the Redo list; hope they will be deleted
@@ -1483,8 +1399,8 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
// the "OpndPtrs" as well. For the similar reason, do not fuse this loop
// with the previous loop --- the iterator of the "Opnds" may be invalidated
// when new elements are added to the vector.
- for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
- OpndPtrs.push_back(&Opnds[i]);
+ for (XorOpnd &Op : Opnds)
+ OpndPtrs.push_back(&Op);
// Step 2: Sort the Xor-Operands in a way such that the operands containing
// the same symbolic value cluster together. For instance, the input operand
@@ -1512,7 +1428,8 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
Value *CV;
// Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
- if (!ConstOpnd.isZero() && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
+ if (!ConstOpnd.isZero() &&
+ CombineXorOpnd(I->getIterator(), CurrOpnd, ConstOpnd, CV)) {
Changed = true;
if (CV)
*CurrOpnd = XorOpnd(CV);
@@ -1529,7 +1446,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
// step 3.2: When previous and current operands share the same symbolic
// value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
- if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
+ if (CombineXorOpnd(I->getIterator(), CurrOpnd, PrevOpnd, ConstOpnd, CV)) {
// Remove previous operand
PrevOpnd->Invalidate();
if (CV) {
@@ -1600,7 +1517,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
Type *Ty = TheOp->getType();
Constant *C = Ty->isIntOrIntVectorTy() ?
ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
- Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
+ Instruction *Mul = CreateMul(TheOp, C, "factor", I->getIterator(), I);
// Now that we have inserted a multiply, optimize it. This allows us to
// handle cases that require multiple factoring steps, such as this:
@@ -1764,7 +1681,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
DummyInst->deleteValue();
unsigned NumAddedValues = NewMulOps.size();
- Value *V = EmitAddTreeOfValues(I, NewMulOps);
+ Value *V = EmitAddTreeOfValues(I->getIterator(), NewMulOps);
// Now that we have inserted the add tree, optimize it. This allows us to
// handle cases that require multiple factoring steps, such as this:
@@ -1775,7 +1692,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
RedoInsts.insert(VI);
// Create the multiply.
- Instruction *V2 = CreateMul(V, MaxOccVal, "reass.mul", I, I);
+ Instruction *V2 = CreateMul(V, MaxOccVal, "reass.mul", I->getIterator(), I);
// Rerun associate on the multiply in case the inner expression turned into
// a multiply. We want to make sure that we keep things in canonical form.
@@ -1914,10 +1831,10 @@ ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase &Builder,
}
// Unique factors with equal powers -- we've folded them into the first one's
// base.
- Factors.erase(std::unique(Factors.begin(), Factors.end(),
- [](const Factor &LHS, const Factor &RHS) {
- return LHS.Power == RHS.Power;
- }),
+ Factors.erase(llvm::unique(Factors,
+ [](const Factor &LHS, const Factor &RHS) {
+ return LHS.Power == RHS.Power;
+ }),
Factors.end());
// Iteratively collect the base of each factor with an add power into the
@@ -1974,7 +1891,7 @@ Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
// Now that we have the linearized expression tree, try to optimize it.
// Start by folding any constants that we found.
- const DataLayout &DL = I->getModule()->getDataLayout();
+ const DataLayout &DL = I->getDataLayout();
Constant *Cst = nullptr;
unsigned Opcode = I->getOpcode();
while (!Ops.empty()) {
@@ -2071,8 +1988,8 @@ void ReassociatePass::EraseInst(Instruction *I) {
I->eraseFromParent();
// Optimize its operands.
SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes.
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) {
+ for (Value *V : Ops)
+ if (Instruction *Op = dyn_cast<Instruction>(V)) {
// If this is a node in an expression tree, climb to the expression root
// and add that since that's where optimization actually happens.
unsigned Opcode = Op->getOpcode();
@@ -2270,7 +2187,7 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
shouldConvertOrWithNoCommonBitsToAdd(I) && !isLoadCombineCandidate(I) &&
(cast<PossiblyDisjointInst>(I)->isDisjoint() ||
haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1),
- SimplifyQuery(I->getModule()->getDataLayout(),
+ SimplifyQuery(I->getDataLayout(),
/*DT=*/nullptr, /*AC=*/nullptr, I)))) {
Instruction *NI = convertOrWithNoCommonBitsToAdd(I);
RedoInsts.insert(I);
@@ -2366,12 +2283,12 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
// First, walk the expression tree, linearizing the tree, collecting the
// operand information.
SmallVector<RepeatedValue, 8> Tree;
- bool HasNUW = true;
- MadeChange |= LinearizeExprTree(I, Tree, RedoInsts, HasNUW);
+ OverflowTracking Flags;
+ MadeChange |= LinearizeExprTree(I, Tree, RedoInsts, Flags);
SmallVector<ValueEntry, 8> Ops;
Ops.reserve(Tree.size());
for (const RepeatedValue &E : Tree)
- Ops.append(E.second.getZExtValue(), ValueEntry(getRank(E.first), E.first));
+ Ops.append(E.second, ValueEntry(getRank(E.first), E.first));
LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
@@ -2560,7 +2477,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
dbgs() << '\n');
// Now that we ordered and optimized the expressions, splat them back into
// the expression tree, removing any unneeded nodes.
- RewriteExprTree(I, Ops, HasNUW);
+ RewriteExprTree(I, Ops, Flags);
}
void
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 6c2b3e9bd4a7..ebc5075aa36f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -64,7 +64,7 @@ static bool runPass(Function &F) {
CastInst *AllocaInsertionPoint = new BitCastInst(
Constant::getNullValue(Type::getInt32Ty(F.getContext())),
- Type::getInt32Ty(F.getContext()), "reg2mem alloca point", &*I);
+ Type::getInt32Ty(F.getContext()), "reg2mem alloca point", I);
// Find the escaped instructions. But don't create stack slots for
// allocas in entry block.
@@ -76,7 +76,7 @@ static bool runPass(Function &F) {
// Demote escaped instructions
NumRegsDemoted += WorkList.size();
for (Instruction *I : WorkList)
- DemoteRegToStack(*I, false, AllocaInsertionPoint);
+ DemoteRegToStack(*I, false, AllocaInsertionPoint->getIterator());
WorkList.clear();
@@ -88,7 +88,7 @@ static bool runPass(Function &F) {
// Demote phi nodes
NumPhisDemoted += WorkList.size();
for (Instruction *I : WorkList)
- DemotePHIToStack(cast<PHINode>(I), AllocaInsertionPoint);
+ DemotePHIToStack(cast<PHINode>(I), AllocaInsertionPoint->getIterator());
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 45ce3bf3ceae..2b99e28acb4e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1143,7 +1143,8 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
assert(Base && "Can't be null");
// The cast is needed since base traversal may strip away bitcasts
if (Base->getType() != Input->getType() && InsertPt)
- Base = new BitCastInst(Base, Input->getType(), "cast", InsertPt);
+ Base = new BitCastInst(Base, Input->getType(), "cast",
+ InsertPt->getIterator());
return Base;
};
@@ -1251,7 +1252,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
// get the data layout to compare the sizes of base/derived pointer values
[[maybe_unused]] auto &DL =
- cast<llvm::Instruction>(Def)->getModule()->getDataLayout();
+ cast<llvm::Instruction>(Def)->getDataLayout();
// Cache all of our results so we can cheaply reuse them
// NOTE: This is actually two caches: one of the base defining value
// relation and one of the base pointer relation! FIXME
@@ -1322,7 +1323,7 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
IsKnownBaseMapTy &KnownBases) {
StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet;
// We assume that all pointers passed to deopt are base pointers; as an
- // optimization, we can use this to avoid seperately materializing the base
+ // optimization, we can use this to avoid separately materializing the base
// pointer graph. This is only relevant since we're very conservative about
// generating new conflict nodes during base pointer insertion. If we were
// smarter there, this would be irrelevant.
@@ -1612,7 +1613,7 @@ public:
// Note: we've inserted instructions, so the call to llvm.deoptimize may
// not necessarily be followed by the matching return.
auto *RI = cast<ReturnInst>(OldI->getParent()->getTerminator());
- new UnreachableInst(RI->getContext(), RI);
+ new UnreachableInst(RI->getContext(), RI->getIterator());
RI->eraseFromParent();
}
@@ -1684,10 +1685,10 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
// Pass through the requested lowering if any. The default is live-through.
StringRef DeoptLowering = getDeoptLowering(Call);
- if (DeoptLowering.equals("live-in"))
+ if (DeoptLowering == "live-in")
Flags |= uint32_t(StatepointFlags::DeoptLiveIn);
else {
- assert(DeoptLowering.equals("live-through") && "Unsupported value!");
+ assert(DeoptLowering == "live-through" && "Unsupported value!");
}
FunctionCallee CallTarget(Call->getFunctionType(), Call->getCalledOperand());
@@ -1733,7 +1734,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
// memcpy(dest_derived, source_derived, ...) =>
// memcpy(dest_base, dest_offset, source_base, source_offset, ...)
auto &Context = Call->getContext();
- auto &DL = Call->getModule()->getDataLayout();
+ auto &DL = Call->getDataLayout();
auto GetBaseAndOffset = [&](Value *Derived) {
Value *Base = nullptr;
// Optimizations in unreachable code might substitute the real pointer
@@ -1976,7 +1977,7 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
// Emit store into the related alloca.
assert(Relocate->getNextNode() &&
"Should always have one since it's not a terminator");
- new StoreInst(Relocate, Alloca, Relocate->getNextNode());
+ new StoreInst(Relocate, Alloca, std::next(Relocate->getIterator()));
#ifndef NDEBUG
VisitedLiveValues.insert(OriginalValue);
@@ -1999,7 +2000,7 @@ static void insertRematerializationStores(
Value *Alloca = AllocaMap[OriginalValue];
new StoreInst(RematerializedValue, Alloca,
- RematerializedValue->getNextNode());
+ std::next(RematerializedValue->getIterator()));
#ifndef NDEBUG
VisitedLiveValues.insert(OriginalValue);
@@ -2029,11 +2030,11 @@ static void relocationViaAlloca(
// Emit alloca for "LiveValue" and record it in "allocaMap" and
// "PromotableAllocas"
- const DataLayout &DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getDataLayout();
auto emitAllocaFor = [&](Value *LiveValue) {
- AllocaInst *Alloca = new AllocaInst(LiveValue->getType(),
- DL.getAllocaAddrSpace(), "",
- F.getEntryBlock().getFirstNonPHI());
+ AllocaInst *Alloca =
+ new AllocaInst(LiveValue->getType(), DL.getAllocaAddrSpace(), "",
+ F.getEntryBlock().getFirstNonPHIIt());
AllocaMap[LiveValue] = Alloca;
PromotableAllocas.push_back(Alloca);
};
@@ -2100,7 +2101,7 @@ static void relocationViaAlloca(
ToClobber.push_back(Alloca);
}
- auto InsertClobbersAt = [&](Instruction *IP) {
+ auto InsertClobbersAt = [&](BasicBlock::iterator IP) {
for (auto *AI : ToClobber) {
auto AT = AI->getAllocatedType();
Constant *CPN;
@@ -2115,10 +2116,11 @@ static void relocationViaAlloca(
// Insert the clobbering stores. These may get intermixed with the
// gc.results and gc.relocates, but that's fine.
if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
- InsertClobbersAt(&*II->getNormalDest()->getFirstInsertionPt());
- InsertClobbersAt(&*II->getUnwindDest()->getFirstInsertionPt());
+ InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
+ InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
} else {
- InsertClobbersAt(cast<Instruction>(Statepoint)->getNextNode());
+ InsertClobbersAt(
+ std::next(cast<Instruction>(Statepoint)->getIterator()));
}
}
}
@@ -2146,7 +2148,7 @@ static void relocationViaAlloca(
}
llvm::sort(Uses);
- auto Last = std::unique(Uses.begin(), Uses.end());
+ auto Last = llvm::unique(Uses);
Uses.erase(Last, Uses.end());
for (Instruction *Use : Uses) {
@@ -2154,15 +2156,15 @@ static void relocationViaAlloca(
PHINode *Phi = cast<PHINode>(Use);
for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
if (Def == Phi->getIncomingValue(i)) {
- LoadInst *Load =
- new LoadInst(Alloca->getAllocatedType(), Alloca, "",
- Phi->getIncomingBlock(i)->getTerminator());
+ LoadInst *Load = new LoadInst(
+ Alloca->getAllocatedType(), Alloca, "",
+ Phi->getIncomingBlock(i)->getTerminator()->getIterator());
Phi->setIncomingValue(i, Load);
}
}
} else {
- LoadInst *Load =
- new LoadInst(Alloca->getAllocatedType(), Alloca, "", Use);
+ LoadInst *Load = new LoadInst(Alloca->getAllocatedType(), Alloca, "",
+ Use->getIterator());
Use->replaceUsesOfWith(Def, Load);
}
}
@@ -2229,16 +2231,16 @@ static void insertUseHolderAfter(CallBase *Call, const ArrayRef<Value *> Values,
if (isa<CallInst>(Call)) {
// For call safepoints insert dummy calls right after safepoint
Holders.push_back(
- CallInst::Create(Func, Values, "", &*++Call->getIterator()));
+ CallInst::Create(Func, Values, "", std::next(Call->getIterator())));
return;
}
// For invoke safepooints insert dummy calls both in normal and
// exceptional destination blocks
auto *II = cast<InvokeInst>(Call);
Holders.push_back(CallInst::Create(
- Func, Values, "", &*II->getNormalDest()->getFirstInsertionPt()));
+ Func, Values, "", II->getNormalDest()->getFirstInsertionPt()));
Holders.push_back(CallInst::Create(
- Func, Values, "", &*II->getUnwindDest()->getFirstInsertionPt()));
+ Func, Values, "", II->getUnwindDest()->getFirstInsertionPt()));
}
static void findLiveReferences(
@@ -2269,7 +2271,7 @@ static Value* findRematerializableChainToBasePointer(
}
if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
- if (!CI->isNoopCast(CI->getModule()->getDataLayout()))
+ if (!CI->isNoopCast(CI->getDataLayout()))
return CI;
ChainToBase.push_back(CI);
@@ -2291,7 +2293,7 @@ chainToBasePointerCost(SmallVectorImpl<Instruction *> &Chain,
for (Instruction *Instr : Chain) {
if (CastInst *CI = dyn_cast<CastInst>(Instr)) {
- assert(CI->isNoopCast(CI->getModule()->getDataLayout()) &&
+ assert(CI->isNoopCast(CI->getDataLayout()) &&
"non noop cast is found during rematerialization");
Type *SrcTy = CI->getOperand(0)->getType();
@@ -2599,7 +2601,7 @@ static bool inlineGetBaseAndOffset(Function &F,
DefiningValueMapTy &DVCache,
IsKnownBaseMapTy &KnownBases) {
auto &Context = F.getContext();
- auto &DL = F.getParent()->getDataLayout();
+ auto &DL = F.getDataLayout();
bool Changed = false;
for (auto *Callsite : Intrinsics)
@@ -3044,8 +3046,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
// which doesn't know how to produce a proper deopt state. So if we see a
// non-leaf memcpy/memmove without deopt state just treat it as a leaf
// copy and don't produce a statepoint.
- if (!AllowStatepointWithNoDeoptInfo &&
- !Call->getOperandBundle(LLVMContext::OB_deopt)) {
+ if (!AllowStatepointWithNoDeoptInfo && !Call->hasDeoptState()) {
assert((isa<AtomicMemCpyInst>(Call) || isa<AtomicMemMoveInst>(Call)) &&
"Don't expect any other calls here!");
return false;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
index 8a491e74b91c..ce45c58e624e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -119,7 +119,7 @@ static bool runSCCP(Function &F, const DataLayout &DL,
}
PreservedAnalyses SCCPPass::run(Function &F, FunctionAnalysisManager &AM) {
- const DataLayout &DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getDataLayout();
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
index 17a94f9381bf..c738a2a6f39a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -116,10 +116,6 @@ STATISTIC(
STATISTIC(NumDeleted, "Number of instructions deleted");
STATISTIC(NumVectorized, "Number of vectorized aggregates");
-/// Hidden option to experiment with completely strict handling of inbounds
-/// GEPs.
-static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
- cl::Hidden);
/// Disable running mem2reg during SROA in order to test or debug SROA.
static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false),
cl::Hidden);
@@ -293,7 +289,7 @@ calculateFragment(DILocalVariable *Variable,
if (!CurrentFragment) {
if (auto Size = Variable->getSizeInBits()) {
// Treat the current fragment as covering the whole variable.
- CurrentFragment = DIExpression::FragmentInfo(*Size, 0);
+ CurrentFragment = DIExpression::FragmentInfo(*Size, 0);
if (Target == CurrentFragment)
return UseNoFrag;
}
@@ -319,28 +315,21 @@ static DebugVariable getAggregateVariable(DbgVariableIntrinsic *DVI) {
return DebugVariable(DVI->getVariable(), std::nullopt,
DVI->getDebugLoc().getInlinedAt());
}
-static DebugVariable getAggregateVariable(DPValue *DPV) {
- return DebugVariable(DPV->getVariable(), std::nullopt,
- DPV->getDebugLoc().getInlinedAt());
+static DebugVariable getAggregateVariable(DbgVariableRecord *DVR) {
+ return DebugVariable(DVR->getVariable(), std::nullopt,
+ DVR->getDebugLoc().getInlinedAt());
}
-static DPValue *createLinkedAssign(DPValue *, DIBuilder &DIB,
- Instruction *LinkedInstr, Value *NewValue,
- DILocalVariable *Variable,
- DIExpression *Expression, Value *Address,
- DIExpression *AddressExpression,
- const DILocation *DI) {
- (void)DIB;
- return DPValue::createLinkedDPVAssign(LinkedInstr, NewValue, Variable,
- Expression, Address, AddressExpression,
- DI);
+/// Helpers for handling new and old debug info modes in migrateDebugInfo.
+/// These overloads unwrap a DbgInstPtr {Instruction* | DbgRecord*} union based
+/// on the \p Unused parameter type.
+DbgVariableRecord *UnwrapDbgInstPtr(DbgInstPtr P, DbgVariableRecord *Unused) {
+ (void)Unused;
+ return static_cast<DbgVariableRecord *>(cast<DbgRecord *>(P));
}
-static DbgAssignIntrinsic *createLinkedAssign(
- DbgAssignIntrinsic *, DIBuilder &DIB, Instruction *LinkedInstr,
- Value *NewValue, DILocalVariable *Variable, DIExpression *Expression,
- Value *Address, DIExpression *AddressExpression, const DILocation *DI) {
- return DIB.insertDbgAssign(LinkedInstr, NewValue, Variable, Expression,
- Address, AddressExpression, DI);
+DbgAssignIntrinsic *UnwrapDbgInstPtr(DbgInstPtr P, DbgAssignIntrinsic *Unused) {
+ (void)Unused;
+ return static_cast<DbgAssignIntrinsic *>(cast<Instruction *>(P));
}
/// Find linked dbg.assign and generate a new one with the correct
@@ -363,9 +352,9 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
Instruction *Inst, Value *Dest, Value *Value,
const DataLayout &DL) {
auto MarkerRange = at::getAssignmentMarkers(OldInst);
- auto DPVAssignMarkerRange = at::getDPVAssignmentMarkers(OldInst);
+ auto DVRAssignMarkerRange = at::getDVRAssignmentMarkers(OldInst);
// Nothing to do if OldInst has no linked dbg.assign intrinsics.
- if (MarkerRange.empty() && DPVAssignMarkerRange.empty())
+ if (MarkerRange.empty() && DVRAssignMarkerRange.empty())
return;
LLVM_DEBUG(dbgs() << " migrateDebugInfo\n");
@@ -386,9 +375,9 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
for (auto *DAI : at::getAssignmentMarkers(OldAlloca))
BaseFragments[getAggregateVariable(DAI)] =
DAI->getExpression()->getFragmentInfo();
- for (auto *DPV : at::getDPVAssignmentMarkers(OldAlloca))
- BaseFragments[getAggregateVariable(DPV)] =
- DPV->getExpression()->getFragmentInfo();
+ for (auto *DVR : at::getDVRAssignmentMarkers(OldAlloca))
+ BaseFragments[getAggregateVariable(DVR)] =
+ DVR->getExpression()->getFragmentInfo();
// The new inst needs a DIAssignID unique metadata tag (if OldInst has
// one). It shouldn't already have one: assert this assumption.
@@ -398,7 +387,7 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
DIBuilder DIB(*OldInst->getModule(), /*AllowUnresolved*/ false);
assert(OldAlloca->isStaticAlloca());
- auto MigrateDbgAssign = [&](auto DbgAssign) {
+ auto MigrateDbgAssign = [&](auto *DbgAssign) {
LLVM_DEBUG(dbgs() << " existing dbg.assign is: " << *DbgAssign
<< "\n");
auto *Expr = DbgAssign->getExpression();
@@ -452,10 +441,12 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
}
::Value *NewValue = Value ? Value : DbgAssign->getValue();
- auto *NewAssign = createLinkedAssign(
- DbgAssign, DIB, Inst, NewValue, DbgAssign->getVariable(), Expr, Dest,
- DIExpression::get(Expr->getContext(), std::nullopt),
- DbgAssign->getDebugLoc());
+ auto *NewAssign = UnwrapDbgInstPtr(
+ DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
+ Dest,
+ DIExpression::get(Expr->getContext(), std::nullopt),
+ DbgAssign->getDebugLoc()),
+ DbgAssign);
// If we've updated the value but the original dbg.assign has an arglist
// then kill it now - we can't use the requested new value.
@@ -493,7 +484,7 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
};
for_each(MarkerRange, MigrateDbgAssign);
- for_each(DPVAssignMarkerRange, MigrateDbgAssign);
+ for_each(DVRAssignMarkerRange, MigrateDbgAssign);
}
namespace {
@@ -510,9 +501,9 @@ class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
public:
void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
- void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
+ void InsertHelper(Instruction *I, const Twine &Name,
BasicBlock::iterator InsertPt) const override {
- IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name), BB,
+ IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name),
InsertPt);
}
};
@@ -635,7 +626,7 @@ public:
int OldSize = Slices.size();
Slices.append(NewSlices.begin(), NewSlices.end());
auto SliceI = Slices.begin() + OldSize;
- llvm::sort(SliceI, Slices.end());
+ std::stable_sort(SliceI, Slices.end());
std::inplace_merge(Slices.begin(), SliceI, Slices.end());
}
@@ -1100,45 +1091,6 @@ private:
if (GEPI.use_empty())
return markAsDead(GEPI);
- if (SROAStrictInbounds && GEPI.isInBounds()) {
- // FIXME: This is a manually un-factored variant of the basic code inside
- // of GEPs with checking of the inbounds invariant specified in the
- // langref in a very strict sense. If we ever want to enable
- // SROAStrictInbounds, this code should be factored cleanly into
- // PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds
- // by writing out the code here where we have the underlying allocation
- // size readily available.
- APInt GEPOffset = Offset;
- const DataLayout &DL = GEPI.getModule()->getDataLayout();
- for (gep_type_iterator GTI = gep_type_begin(GEPI),
- GTE = gep_type_end(GEPI);
- GTI != GTE; ++GTI) {
- ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
- if (!OpC)
- break;
-
- // Handle a struct index, which adds its field offset to the pointer.
- if (StructType *STy = GTI.getStructTypeOrNull()) {
- unsigned ElementIdx = OpC->getZExtValue();
- const StructLayout *SL = DL.getStructLayout(STy);
- GEPOffset +=
- APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx));
- } else {
- // For array or vector indices, scale the index by the size of the
- // type.
- APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
- GEPOffset += Index * APInt(Offset.getBitWidth(),
- GTI.getSequentialElementStride(DL));
- }
-
- // If this index has computed an intermediate pointer which is not
- // inbounds, then the result of the GEP is a poison value and we can
- // delete it and all uses.
- if (GEPOffset.ugt(AllocSize))
- return markAsDead(GEPI);
- }
- }
-
return Base::visitGetElementPtrInst(GEPI);
}
@@ -1213,8 +1165,9 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&II);
- insertUse(II, Offset, Length ? Length->getLimitedValue()
- : AllocSize - Offset.getLimitedValue(),
+ insertUse(II, Offset,
+ Length ? Length->getLimitedValue()
+ : AllocSize - Offset.getLimitedValue(),
(bool)Length);
}
@@ -1327,7 +1280,7 @@ private:
SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses;
Visited.insert(Root);
Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
- const DataLayout &DL = Root->getModule()->getDataLayout();
+ const DataLayout &DL = Root->getDataLayout();
// If there are no loads or stores, the access is dead. We mark that as
// a size zero access.
Size = 0;
@@ -1574,7 +1527,7 @@ findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
/// FIXME: This should be hoisted into a generic utility, likely in
/// Transforms/Util/Local.h
static bool isSafePHIToSpeculate(PHINode &PN) {
- const DataLayout &DL = PN.getModule()->getDataLayout();
+ const DataLayout &DL = PN.getDataLayout();
// For now, we can only do this promotion if the load is in the same block
// as the PHI, and if there are no stores between the phi and load.
@@ -1669,7 +1622,7 @@ static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN) {
}
// Inject loads into all of the pred blocks.
- DenseMap<BasicBlock*, Value*> InjectedLoads;
+ DenseMap<BasicBlock *, Value *> InjectedLoads;
for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
BasicBlock *Pred = PN.getIncomingBlock(Idx);
Value *InVal = PN.getIncomingValue(Idx);
@@ -1678,7 +1631,7 @@ static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN) {
// basic block, as long as the value is the same. So if we already injected
// a load in the predecessor, then we should reuse the same load for all
// duplicated entries.
- if (Value* V = InjectedLoads.lookup(Pred)) {
+ if (Value *V = InjectedLoads.lookup(Pred)) {
NewPN->addIncoming(V, Pred);
continue;
}
@@ -1732,7 +1685,7 @@ isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG) {
assert(LI.isSimple() && "Only for simple loads");
SelectHandSpeculativity Spec;
- const DataLayout &DL = SI.getModule()->getDataLayout();
+ const DataLayout &DL = SI.getDataLayout();
for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()})
if (isSafeToLoadUnconditionally(Value, LI.getType(), LI.getAlign(), DL,
&LI))
@@ -1852,7 +1805,7 @@ static void rewriteMemOpOfSelect(SelectInst &SI, T &I,
Tail->setName(Head->getName() + ".cont");
PHINode *PN;
if (isa<LoadInst>(I))
- PN = PHINode::Create(I.getType(), 2, "", &I);
+ PN = PHINode::Create(I.getType(), 2, "", I.getIterator());
for (BasicBlock *SuccBB : successors(Head)) {
bool IsThen = SuccBB == HeadBI->getSuccessor(0);
int SuccIdx = IsThen ? 0 : 1;
@@ -2077,8 +2030,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
if (BeginIndex * ElementSize != BeginOffset ||
BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
return false;
- uint64_t EndOffset =
- std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
+ uint64_t EndOffset = std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
uint64_t EndIndex = EndOffset / ElementSize;
if (EndIndex * ElementSize != EndOffset ||
EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
@@ -2226,8 +2178,7 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL,
cast<FixedVectorType>(LHSTy)->getNumElements();
};
llvm::sort(CandidateTys, RankVectorTypesComp);
- CandidateTys.erase(std::unique(CandidateTys.begin(), CandidateTys.end(),
- RankVectorTypesEq),
+ CandidateTys.erase(llvm::unique(CandidateTys, RankVectorTypesEq),
CandidateTys.end());
} else {
// The only way to have the same element type in every vector type is to
@@ -2780,8 +2731,8 @@ public:
Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
IRB.SetInsertPoint(OldUserI);
IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
- IRB.getInserter().SetNamePrefix(
- Twine(NewAI.getName()) + "." + Twine(BeginOffset) + ".");
+ IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + "." +
+ Twine(BeginOffset) + ".");
CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
if (VecTy || IntTy)
@@ -2834,7 +2785,7 @@ private:
#else
Twine()
#endif
- );
+ );
}
/// Compute suitable alignment to access this slice of the *new*
@@ -2940,7 +2891,8 @@ private:
// Do this after copyMetadataForLoad() to preserve the TBAA shift.
if (AATags)
- NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ NewLI->setAAMetadata(AATags.adjustForAccess(
+ NewBeginOffset - BeginOffset, NewLI->getType(), DL));
// Try to preserve nonnull metadata
V = NewLI;
@@ -2961,8 +2913,11 @@ private:
LoadInst *NewLI =
IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
getSliceAlign(), LI.isVolatile(), LI.getName());
+
if (AATags)
- NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ NewLI->setAAMetadata(AATags.adjustForAccess(
+ NewBeginOffset - BeginOffset, NewLI->getType(), DL));
+
if (LI.isVolatile())
NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
@@ -2982,7 +2937,12 @@ private:
assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&
"Non-byte-multiple bit width");
// Move the insertion point just past the load so that we can refer to it.
- IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI)));
+ BasicBlock::iterator LIIt = std::next(LI.getIterator());
+ // Ensure the insertion point comes before any debug-info immediately
+ // after the load, so that variable values referring to the load are
+ // dominated by it.
+ LIIt.setHeadBit(true);
+ IRB.SetInsertPoint(LI.getParent(), LIIt);
// Create a placeholder value with the same type as LI to use as the
// basis for the new value. This allows us to replace the uses of LI with
// the computed value, and then replace the placeholder with LI, leaving
@@ -3032,7 +2992,8 @@ private:
Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
- Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+ V->getType(), DL));
Pass.DeadInsts.push_back(&SI);
// NOTE: Careful to use OrigV rather than V.
@@ -3059,7 +3020,8 @@ private:
Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
- Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+ V->getType(), DL));
migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
Store, Store->getPointerOperand(),
@@ -3119,7 +3081,8 @@ private:
NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
- NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+ V->getType(), DL));
if (SI.isVolatile())
NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
if (NewSI->isAtomic())
@@ -3188,7 +3151,7 @@ private:
// emit dbg.assign intrinsics for mem intrinsics storing through non-
// constant geps, or storing a variable number of bytes.
assert(at::getAssignmentMarkers(&II).empty() &&
- at::getDPVAssignmentMarkers(&II).empty() &&
+ at::getDVRAssignmentMarkers(&II).empty() &&
"AT: Unexpected link to non-const GEP");
deleteIfTriviallyDead(OldPtr);
return false;
@@ -3203,8 +3166,7 @@ private:
const bool CanContinue = [&]() {
if (VecTy || IntTy)
return true;
- if (BeginOffset > NewAllocaBeginOffset ||
- EndOffset < NewAllocaEndOffset)
+ if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
return false;
// Length must be in range for FixedVectorType.
auto *C = cast<ConstantInt>(II.getLength());
@@ -3221,12 +3183,14 @@ private:
// a single value type, just emit a memset.
if (!CanContinue) {
Type *SizeTy = II.getLength()->getType();
- Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
+ unsigned Sz = NewEndOffset - NewBeginOffset;
+ Constant *Size = ConstantInt::get(SizeTy, Sz);
MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet(
getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
MaybeAlign(getSliceAlign()), II.isVolatile()));
if (AATags)
- New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ New->setAAMetadata(
+ AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
New, New->getRawDest(), nullptr, DL);
@@ -3302,7 +3266,8 @@ private:
New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
- New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+ V->getType(), DL));
migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
New, New->getPointerOperand(), V, DL);
@@ -3341,7 +3306,7 @@ private:
DbgAssign->replaceVariableLocationOp(II.getDest(), AdjustedPtr);
};
for_each(at::getAssignmentMarkers(&II), UpdateAssignAddress);
- for_each(at::getDPVAssignmentMarkers(&II), UpdateAssignAddress);
+ for_each(at::getDVRAssignmentMarkers(&II), UpdateAssignAddress);
II.setDest(AdjustedPtr);
II.setDestAlignment(SliceAlign);
} else {
@@ -3507,7 +3472,8 @@ private:
Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
- Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+ Load->getType(), DL));
Src = Load;
}
@@ -3529,7 +3495,8 @@ private:
Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
- Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
+ Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
+ Src->getType(), DL));
APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
if (IsDest) {
@@ -3857,7 +3824,8 @@ private:
DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
if (AATags &&
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
- Load->setAAMetadata(AATags.shift(Offset.getZExtValue()));
+ Load->setAAMetadata(
+ AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
@@ -3908,8 +3876,10 @@ private:
APInt Offset(
DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset);
- if (AATags)
- Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));
+ if (AATags) {
+ Store->setAAMetadata(AATags.adjustForAccess(
+ Offset.getZExtValue(), ExtractValue->getType(), DL));
+ }
// migrateDebugInfo requires the base Alloca. Walk to it from this gep.
// If we cannot (because there's an intervening non-const or unbounded
@@ -3925,7 +3895,7 @@ private:
DL);
} else {
assert(at::getAssignmentMarkers(Store).empty() &&
- at::getDPVAssignmentMarkers(Store).empty() &&
+ at::getDVRAssignmentMarkers(Store).empty() &&
"AT: unexpected debug.assign linked to store through "
"unbounded GEP");
}
@@ -3963,30 +3933,62 @@ private:
return false;
}
- // Fold gep (select cond, ptr1, ptr2) => select cond, gep(ptr1), gep(ptr2)
- bool foldGEPSelect(GetElementPtrInst &GEPI) {
- if (!GEPI.hasAllConstantIndices())
+ // Unfold gep (select cond, ptr1, ptr2), idx
+ // => select cond, gep(ptr1, idx), gep(ptr2, idx)
+ // and gep ptr, (select cond, idx1, idx2)
+ // => select cond, gep(ptr, idx1), gep(ptr, idx2)
+ bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
+ // Check whether the GEP has exactly one select operand and all indices
+ // will become constant after the transform.
+ SelectInst *Sel = dyn_cast<SelectInst>(GEPI.getPointerOperand());
+ for (Value *Op : GEPI.indices()) {
+ if (auto *SI = dyn_cast<SelectInst>(Op)) {
+ if (Sel)
+ return false;
+
+ Sel = SI;
+ if (!isa<ConstantInt>(Sel->getTrueValue()) ||
+ !isa<ConstantInt>(Sel->getFalseValue()))
+ return false;
+ continue;
+ }
+
+ if (!isa<ConstantInt>(Op))
+ return false;
+ }
+
+ if (!Sel)
return false;
- SelectInst *Sel = cast<SelectInst>(GEPI.getPointerOperand());
+ LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):\n";
+ dbgs() << " original: " << *Sel << "\n";
+ dbgs() << " " << GEPI << "\n";);
+
+ auto GetNewOps = [&](Value *SelOp) {
+ SmallVector<Value *> NewOps;
+ for (Value *Op : GEPI.operands())
+ if (Op == Sel)
+ NewOps.push_back(SelOp);
+ else
+ NewOps.push_back(Op);
+ return NewOps;
+ };
- LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):"
- << "\n original: " << *Sel
- << "\n " << GEPI);
+ Value *True = Sel->getTrueValue();
+ Value *False = Sel->getFalseValue();
+ SmallVector<Value *> TrueOps = GetNewOps(True);
+ SmallVector<Value *> FalseOps = GetNewOps(False);
IRB.SetInsertPoint(&GEPI);
- SmallVector<Value *, 4> Index(GEPI.indices());
- bool IsInBounds = GEPI.isInBounds();
+ GEPNoWrapFlags NW = GEPI.getNoWrapFlags();
Type *Ty = GEPI.getSourceElementType();
- Value *True = Sel->getTrueValue();
- Value *NTrue = IRB.CreateGEP(Ty, True, Index, True->getName() + ".sroa.gep",
- IsInBounds);
-
- Value *False = Sel->getFalseValue();
+ Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0], ArrayRef(TrueOps).drop_front(),
+ True->getName() + ".sroa.gep", NW);
- Value *NFalse = IRB.CreateGEP(Ty, False, Index,
- False->getName() + ".sroa.gep", IsInBounds);
+ Value *NFalse =
+ IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(),
+ False->getName() + ".sroa.gep", NW);
Value *NSel = IRB.CreateSelect(Sel->getCondition(), NTrue, NFalse,
Sel->getName() + ".sroa.sel");
@@ -3997,75 +3999,114 @@ private:
Visited.insert(NSelI);
enqueueUsers(*NSelI);
- LLVM_DEBUG(dbgs() << "\n to: " << *NTrue
- << "\n " << *NFalse
- << "\n " << *NSel << '\n');
+ LLVM_DEBUG(dbgs() << " to: " << *NTrue << "\n";
+ dbgs() << " " << *NFalse << "\n";
+ dbgs() << " " << *NSel << "\n";);
return true;
}
- // Fold gep (phi ptr1, ptr2) => phi gep(ptr1), gep(ptr2)
- bool foldGEPPhi(GetElementPtrInst &GEPI) {
- if (!GEPI.hasAllConstantIndices())
- return false;
+ // Unfold gep (phi ptr1, ptr2), idx
+ // => phi ((gep ptr1, idx), (gep ptr2, idx))
+ // and gep ptr, (phi idx1, idx2)
+ // => phi ((gep ptr, idx1), (gep ptr, idx2))
+ bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
+ // To prevent infinitely expanding recursive phis, bail if the GEP pointer
+ // operand (looking through the phi if it is the phi we want to unfold) is
+ // an instruction besides a static alloca.
+ PHINode *Phi = dyn_cast<PHINode>(GEPI.getPointerOperand());
+ auto IsInvalidPointerOperand = [](Value *V) {
+ if (!isa<Instruction>(V))
+ return false;
+ if (auto *AI = dyn_cast<AllocaInst>(V))
+ return !AI->isStaticAlloca();
+ return true;
+ };
+ if (Phi) {
+ if (any_of(Phi->operands(), IsInvalidPointerOperand))
+ return false;
+ } else {
+ if (IsInvalidPointerOperand(GEPI.getPointerOperand()))
+ return false;
+ }
+ // Check whether the GEP has exactly one phi operand (including the pointer
+ // operand) and all indices will become constant after the transform.
+ for (Value *Op : GEPI.indices()) {
+ if (auto *SI = dyn_cast<PHINode>(Op)) {
+ if (Phi)
+ return false;
+
+ Phi = SI;
+ if (!all_of(Phi->incoming_values(),
+ [](Value *V) { return isa<ConstantInt>(V); }))
+ return false;
+ continue;
+ }
+
+ if (!isa<ConstantInt>(Op))
+ return false;
+ }
- PHINode *PHI = cast<PHINode>(GEPI.getPointerOperand());
- if (GEPI.getParent() != PHI->getParent() ||
- llvm::any_of(PHI->incoming_values(), [](Value *In)
- { Instruction *I = dyn_cast<Instruction>(In);
- return !I || isa<GetElementPtrInst>(I) || isa<PHINode>(I) ||
- succ_empty(I->getParent()) ||
- !I->getParent()->isLegalToHoistInto();
- }))
+ if (!Phi)
return false;
- LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):"
- << "\n original: " << *PHI
- << "\n " << GEPI
- << "\n to: ");
-
- SmallVector<Value *, 4> Index(GEPI.indices());
- bool IsInBounds = GEPI.isInBounds();
- IRB.SetInsertPoint(GEPI.getParent(), GEPI.getParent()->getFirstNonPHIIt());
- PHINode *NewPN = IRB.CreatePHI(GEPI.getType(), PHI->getNumIncomingValues(),
- PHI->getName() + ".sroa.phi");
- for (unsigned I = 0, E = PHI->getNumIncomingValues(); I != E; ++I) {
- BasicBlock *B = PHI->getIncomingBlock(I);
- Value *NewVal = nullptr;
- int Idx = NewPN->getBasicBlockIndex(B);
- if (Idx >= 0) {
- NewVal = NewPN->getIncomingValue(Idx);
- } else {
- Instruction *In = cast<Instruction>(PHI->getIncomingValue(I));
+ LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):\n";
+ dbgs() << " original: " << *Phi << "\n";
+ dbgs() << " " << GEPI << "\n";);
+
+ auto GetNewOps = [&](Value *PhiOp) {
+ SmallVector<Value *> NewOps;
+ for (Value *Op : GEPI.operands())
+ if (Op == Phi)
+ NewOps.push_back(PhiOp);
+ else
+ NewOps.push_back(Op);
+ return NewOps;
+ };
- IRB.SetInsertPoint(In->getParent(), std::next(In->getIterator()));
- Type *Ty = GEPI.getSourceElementType();
- NewVal = IRB.CreateGEP(Ty, In, Index, In->getName() + ".sroa.gep",
- IsInBounds);
+ IRB.SetInsertPoint(Phi);
+ PHINode *NewPhi = IRB.CreatePHI(GEPI.getType(), Phi->getNumIncomingValues(),
+ Phi->getName() + ".sroa.phi");
+
+ Type *SourceTy = GEPI.getSourceElementType();
+ // We only handle arguments, constants, and static allocas here, so we can
+ // insert GEPs at the end of the entry block.
+ IRB.SetInsertPoint(GEPI.getFunction()->getEntryBlock().getTerminator());
+ for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
+ Value *Op = Phi->getIncomingValue(I);
+ BasicBlock *BB = Phi->getIncomingBlock(I);
+ Value *NewGEP;
+ if (int NI = NewPhi->getBasicBlockIndex(BB); NI >= 0) {
+ NewGEP = NewPhi->getIncomingValue(NI);
+ } else {
+ SmallVector<Value *> NewOps = GetNewOps(Op);
+ NewGEP =
+ IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(),
+ Phi->getName() + ".sroa.gep", GEPI.getNoWrapFlags());
}
- NewPN->addIncoming(NewVal, B);
+ NewPhi->addIncoming(NewGEP, BB);
}
Visited.erase(&GEPI);
- GEPI.replaceAllUsesWith(NewPN);
+ GEPI.replaceAllUsesWith(NewPhi);
GEPI.eraseFromParent();
- Visited.insert(NewPN);
- enqueueUsers(*NewPN);
+ Visited.insert(NewPhi);
+ enqueueUsers(*NewPhi);
- LLVM_DEBUG(for (Value *In : NewPN->incoming_values())
- dbgs() << "\n " << *In;
- dbgs() << "\n " << *NewPN << '\n');
+ LLVM_DEBUG(dbgs() << " to: ";
+ for (Value *In
+ : NewPhi->incoming_values()) dbgs()
+ << "\n " << *In;
+ dbgs() << "\n " << *NewPhi << '\n');
return true;
}
bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
- if (isa<SelectInst>(GEPI.getPointerOperand()) &&
- foldGEPSelect(GEPI))
+ if (unfoldGEPSelect(GEPI))
return true;
- if (isa<PHINode>(GEPI.getPointerOperand()) &&
- foldGEPPhi(GEPI))
+ if (unfoldGEPPhi(GEPI))
return true;
enqueueUsers(GEPI);
@@ -4137,17 +4178,17 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
return nullptr;
if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
- Type *ElementTy;
- uint64_t TyNumElements;
- if (auto *AT = dyn_cast<ArrayType>(Ty)) {
- ElementTy = AT->getElementType();
- TyNumElements = AT->getNumElements();
- } else {
- // FIXME: This isn't right for vectors with non-byte-sized or
- // non-power-of-two sized elements.
- auto *VT = cast<FixedVectorType>(Ty);
- ElementTy = VT->getElementType();
- TyNumElements = VT->getNumElements();
+ Type *ElementTy;
+ uint64_t TyNumElements;
+ if (auto *AT = dyn_cast<ArrayType>(Ty)) {
+ ElementTy = AT->getElementType();
+ TyNumElements = AT->getNumElements();
+ } else {
+ // FIXME: This isn't right for vectors with non-byte-sized or
+ // non-power-of-two sized elements.
+ auto *VT = cast<FixedVectorType>(Ty);
+ ElementTy = VT->getElementType();
+ TyNumElements = VT->getNumElements();
}
uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
uint64_t NumSkippedElements = Offset / ElementSize;
@@ -4458,7 +4499,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// them to the alloca slices.
SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
std::vector<LoadInst *> SplitLoads;
- const DataLayout &DL = AI.getModule()->getDataLayout();
+ const DataLayout &DL = AI.getDataLayout();
for (LoadInst *LI : Loads) {
SplitLoads.clear();
@@ -4532,6 +4573,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
Value *StoreBasePtr = SI->getPointerOperand();
IRB.SetInsertPoint(SI);
+ AAMDNodes AATags = SI->getAAMetadata();
LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
@@ -4551,6 +4593,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group,
LLVMContext::MD_DIAssignID});
+
+ if (AATags)
+ PStore->setAAMetadata(
+ AATags.adjustForAccess(PartOffset, PLoad->getType(), DL));
LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
}
@@ -4747,7 +4793,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
// or an i8 array of an appropriate size.
Type *SliceTy = nullptr;
VectorType *SliceVecTy = nullptr;
- const DataLayout &DL = AI.getModule()->getDataLayout();
+ const DataLayout &DL = AI.getDataLayout();
std::pair<Type *, IntegerType *> CommonUseTy =
findCommonType(P.begin(), P.end(), P.endOffset());
// Do all uses operate on the same type?
@@ -4817,15 +4863,15 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
NewAI = new AllocaInst(
SliceTy, AI.getAddressSpace(), nullptr,
IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment,
- AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI);
+ AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()),
+ AI.getIterator());
// Copy the old AI debug location over to the new one.
NewAI->setDebugLoc(AI.getDebugLoc());
++NumNewAllocas;
}
- LLVM_DEBUG(dbgs() << "Rewriting alloca partition "
- << "[" << P.beginOffset() << "," << P.endOffset()
- << ") to: " << *NewAI << "\n");
+ LLVM_DEBUG(dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset()
+ << "," << P.endOffset() << ") to: " << *NewAI << "\n");
// Track the high watermark on the worklist as it is only relevant for
// promoted allocas. We will reset it to this point if the alloca is not in
@@ -4921,45 +4967,236 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
return NewAI;
}
-static void insertNewDbgInst(DIBuilder &DIB, DbgDeclareInst *Orig,
- AllocaInst *NewAddr, DIExpression *NewFragmentExpr,
- Instruction *BeforeInst) {
- DIB.insertDeclare(NewAddr, Orig->getVariable(), NewFragmentExpr,
+// There isn't a shared interface to get the "address" parts out of a
+// dbg.declare and dbg.assign, so provide some wrappers now for
+// both debug intrinsics and records.
+const Value *getAddress(const DbgVariableIntrinsic *DVI) {
+ if (const auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI))
+ return DAI->getAddress();
+ return cast<DbgDeclareInst>(DVI)->getAddress();
+}
+
+const Value *getAddress(const DbgVariableRecord *DVR) {
+ assert(DVR->getType() == DbgVariableRecord::LocationType::Declare ||
+ DVR->getType() == DbgVariableRecord::LocationType::Assign);
+ return DVR->getAddress();
+}
+
+bool isKillAddress(const DbgVariableIntrinsic *DVI) {
+ if (const auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI))
+ return DAI->isKillAddress();
+ return cast<DbgDeclareInst>(DVI)->isKillLocation();
+}
+
+bool isKillAddress(const DbgVariableRecord *DVR) {
+ assert(DVR->getType() == DbgVariableRecord::LocationType::Declare ||
+ DVR->getType() == DbgVariableRecord::LocationType::Assign);
+ if (DVR->getType() == DbgVariableRecord::LocationType::Assign)
+ return DVR->isKillAddress();
+ return DVR->isKillLocation();
+}
+
+const DIExpression *getAddressExpression(const DbgVariableIntrinsic *DVI) {
+ if (const auto *DAI = dyn_cast<DbgAssignIntrinsic>(DVI))
+ return DAI->getAddressExpression();
+ return cast<DbgDeclareInst>(DVI)->getExpression();
+}
+
+const DIExpression *getAddressExpression(const DbgVariableRecord *DVR) {
+ assert(DVR->getType() == DbgVariableRecord::LocationType::Declare ||
+ DVR->getType() == DbgVariableRecord::LocationType::Assign);
+ if (DVR->getType() == DbgVariableRecord::LocationType::Assign)
+ return DVR->getAddressExpression();
+ return DVR->getExpression();
+}
+
+/// Create or replace an existing fragment in a DIExpression with \p Frag.
+/// If the expression already contains a DW_OP_LLVM_extract_bits_[sz]ext
+/// operation, add \p BitExtractOffset to the offset part.
+///
+/// Returns the new expression, or nullptr if this fails (see details below).
+///
+/// This function is similar to DIExpression::createFragmentExpression except
+/// for 3 important distinctions:
+/// 1. The new fragment isn't relative to an existing fragment.
+/// 2. It assumes the computed location is a memory location. This means we
+/// don't need to perform checks that creating the fragment preserves the
+/// expression semantics.
+/// 3. Existing extract_bits are modified independently of fragment changes
+/// using \p BitExtractOffset. A change to the fragment offset or size
+/// may affect a bit extract. But a bit extract offset can change
+/// independently of the fragment dimensions.
+///
+/// Returns the new expression, or nullptr if one couldn't be created.
+/// Ideally this is only used to signal that a bit-extract has become
+/// zero-sized (and thus the new debug record has no size and can be
+/// dropped), however, it fails for other reasons too - see the FIXME below.
+///
+/// FIXME: To keep the change that introduces this function NFC it bails
+/// in some situations unecessarily, e.g. when fragment and bit extract
+/// sizes differ.
+static DIExpression *createOrReplaceFragment(const DIExpression *Expr,
+ DIExpression::FragmentInfo Frag,
+ int64_t BitExtractOffset) {
+ SmallVector<uint64_t, 8> Ops;
+ bool HasFragment = false;
+ bool HasBitExtract = false;
+
+ for (auto &Op : Expr->expr_ops()) {
+ if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) {
+ HasFragment = true;
+ continue;
+ }
+ if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext ||
+ Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_sext) {
+ HasBitExtract = true;
+ int64_t ExtractOffsetInBits = Op.getArg(0);
+ int64_t ExtractSizeInBits = Op.getArg(1);
+
+ // DIExpression::createFragmentExpression doesn't know how to handle
+ // a fragment that is smaller than the extract. Copy the behaviour
+ // (bail) to avoid non-NFC changes.
+ // FIXME: Don't do this.
+ if (Frag.SizeInBits < uint64_t(ExtractSizeInBits))
+ return nullptr;
+
+ assert(BitExtractOffset <= 0);
+ int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
+
+ // DIExpression::createFragmentExpression doesn't know what to do
+ // if the new extract starts "outside" the existing one. Copy the
+ // behaviour (bail) to avoid non-NFC changes.
+ // FIXME: Don't do this.
+ if (AdjustedOffset < 0)
+ return nullptr;
+
+ Ops.push_back(Op.getOp());
+ Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
+ Ops.push_back(ExtractSizeInBits);
+ continue;
+ }
+ Op.appendToVector(Ops);
+ }
+
+ // Unsupported by createFragmentExpression, so don't support it here yet to
+ // preserve NFC-ness.
+ if (HasFragment && HasBitExtract)
+ return nullptr;
+
+ if (!HasBitExtract) {
+ Ops.push_back(dwarf::DW_OP_LLVM_fragment);
+ Ops.push_back(Frag.OffsetInBits);
+ Ops.push_back(Frag.SizeInBits);
+ }
+ return DIExpression::get(Expr->getContext(), Ops);
+}
+
+/// Insert a new dbg.declare.
+/// \p Orig Original to copy debug loc and variable from.
+/// \p NewAddr Location's new base address.
+/// \p NewAddrExpr New expression to apply to address.
+/// \p BeforeInst Insert position.
+/// \p NewFragment New fragment (absolute, non-relative).
+/// \p BitExtractAdjustment Offset to apply to any extract_bits op.
+static void
+insertNewDbgInst(DIBuilder &DIB, DbgDeclareInst *Orig, AllocaInst *NewAddr,
+ DIExpression *NewAddrExpr, Instruction *BeforeInst,
+ std::optional<DIExpression::FragmentInfo> NewFragment,
+ int64_t BitExtractAdjustment) {
+ if (NewFragment)
+ NewAddrExpr = createOrReplaceFragment(NewAddrExpr, *NewFragment,
+ BitExtractAdjustment);
+ if (!NewAddrExpr)
+ return;
+
+ DIB.insertDeclare(NewAddr, Orig->getVariable(), NewAddrExpr,
Orig->getDebugLoc(), BeforeInst);
}
-static void insertNewDbgInst(DIBuilder &DIB, DbgAssignIntrinsic *Orig,
- AllocaInst *NewAddr, DIExpression *NewFragmentExpr,
- Instruction *BeforeInst) {
+
+/// Insert a new dbg.assign.
+/// \p Orig Original to copy debug loc, variable, value and value expression
+/// from.
+/// \p NewAddr Location's new base address.
+/// \p NewAddrExpr New expression to apply to address.
+/// \p BeforeInst Insert position.
+/// \p NewFragment New fragment (absolute, non-relative).
+/// \p BitExtractAdjustment Offset to apply to any extract_bits op.
+static void
+insertNewDbgInst(DIBuilder &DIB, DbgAssignIntrinsic *Orig, AllocaInst *NewAddr,
+ DIExpression *NewAddrExpr, Instruction *BeforeInst,
+ std::optional<DIExpression::FragmentInfo> NewFragment,
+ int64_t BitExtractAdjustment) {
+ // DIBuilder::insertDbgAssign will insert the #dbg_assign after NewAddr.
(void)BeforeInst;
+
+ // A dbg.assign puts fragment info in the value expression only. The address
+ // expression has already been built: NewAddrExpr.
+ DIExpression *NewFragmentExpr = Orig->getExpression();
+ if (NewFragment)
+ NewFragmentExpr = createOrReplaceFragment(NewFragmentExpr, *NewFragment,
+ BitExtractAdjustment);
+ if (!NewFragmentExpr)
+ return;
+
+ // Apply a DIAssignID to the store if it doesn't already have it.
if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
DIAssignID::getDistinct(NewAddr->getContext()));
}
- auto *NewAssign = DIB.insertDbgAssign(
- NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
- Orig->getAddressExpression(), Orig->getDebugLoc());
+
+ Instruction *NewAssign =
+ DIB.insertDbgAssign(NewAddr, Orig->getValue(), Orig->getVariable(),
+ NewFragmentExpr, NewAddr, NewAddrExpr,
+ Orig->getDebugLoc())
+ .get<Instruction *>();
LLVM_DEBUG(dbgs() << "Created new assign intrinsic: " << *NewAssign << "\n");
(void)NewAssign;
}
-static void insertNewDbgInst(DIBuilder &DIB, DPValue *Orig, AllocaInst *NewAddr,
- DIExpression *NewFragmentExpr,
- Instruction *BeforeInst) {
+
+/// Insert a new DbgRecord.
+/// \p Orig Original to copy record type, debug loc and variable from, and
+/// additionally value and value expression for dbg_assign records.
+/// \p NewAddr Location's new base address.
+/// \p NewAddrExpr New expression to apply to address.
+/// \p BeforeInst Insert position.
+/// \p NewFragment New fragment (absolute, non-relative).
+/// \p BitExtractAdjustment Offset to apply to any extract_bits op.
+static void
+insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr,
+ DIExpression *NewAddrExpr, Instruction *BeforeInst,
+ std::optional<DIExpression::FragmentInfo> NewFragment,
+ int64_t BitExtractAdjustment) {
(void)DIB;
+
+ // A dbg_assign puts fragment info in the value expression only. The address
+ // expression has already been built: NewAddrExpr. A dbg_declare puts the
+ // new fragment info into NewAddrExpr (as it only has one expression).
+ DIExpression *NewFragmentExpr =
+ Orig->isDbgAssign() ? Orig->getExpression() : NewAddrExpr;
+ if (NewFragment)
+ NewFragmentExpr = createOrReplaceFragment(NewFragmentExpr, *NewFragment,
+ BitExtractAdjustment);
+ if (!NewFragmentExpr)
+ return;
+
if (Orig->isDbgDeclare()) {
- DPValue *DPV = DPValue::createDPVDeclare(
+ DbgVariableRecord *DVR = DbgVariableRecord::createDVRDeclare(
NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
- BeforeInst->getParent()->insertDPValueBefore(DPV,
- BeforeInst->getIterator());
+ BeforeInst->getParent()->insertDbgRecordBefore(DVR,
+ BeforeInst->getIterator());
return;
}
+
+ // Apply a DIAssignID to the store if it doesn't already have it.
if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
DIAssignID::getDistinct(NewAddr->getContext()));
}
- auto *NewAssign = DPValue::createLinkedDPVAssign(
+
+ DbgVariableRecord *NewAssign = DbgVariableRecord::createLinkedDVRAssign(
NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
- Orig->getAddressExpression(), Orig->getDebugLoc());
- LLVM_DEBUG(dbgs() << "Created new DPVAssign: " << *NewAssign << "\n");
+ NewAddrExpr, Orig->getDebugLoc());
+ LLVM_DEBUG(dbgs() << "Created new DVRAssign: " << *NewAssign << "\n");
(void)NewAssign;
}
@@ -5010,8 +5247,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
IsSorted = false;
}
}
- }
- else {
+ } else {
// We only allow whole-alloca splittable loads and stores
// for a large alloca to avoid creating too large BitVector.
for (Slice &S : AS) {
@@ -5030,7 +5266,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
}
if (!IsSorted)
- llvm::sort(AS);
+ llvm::stable_sort(AS);
/// Describes the allocas introduced by rewritePartition in order to migrate
/// the debug info.
@@ -5039,7 +5275,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
uint64_t Offset;
uint64_t Size;
Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
- : Alloca(AI), Offset(O), Size(S) {}
+ : Alloca(AI), Offset(O), Size(S) {}
};
SmallVector<Fragment, 4> Fragments;
@@ -5053,7 +5289,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();
// Don't include any padding.
uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
- Fragments.push_back(Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
+ Fragments.push_back(
+ Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
}
}
++NumPartitions;
@@ -5065,54 +5302,78 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
// Migrate debug information from the old alloca to the new alloca(s)
// and the individual partitions.
auto MigrateOne = [&](auto *DbgVariable) {
- auto *Expr = DbgVariable->getExpression();
- DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
- uint64_t AllocaSize =
- DL.getTypeSizeInBits(AI.getAllocatedType()).getFixedValue();
- for (auto Fragment : Fragments) {
- // Create a fragment expression describing the new partition or reuse AI's
- // expression if there is only one partition.
- auto *FragmentExpr = Expr;
- if (Fragment.Size < AllocaSize || Expr->isFragment()) {
- // If this alloca is already a scalar replacement of a larger aggregate,
- // Fragment.Offset describes the offset inside the scalar.
- auto ExprFragment = Expr->getFragmentInfo();
- uint64_t Offset = ExprFragment ? ExprFragment->OffsetInBits : 0;
- uint64_t Start = Offset + Fragment.Offset;
- uint64_t Size = Fragment.Size;
- if (ExprFragment) {
- uint64_t AbsEnd =
- ExprFragment->OffsetInBits + ExprFragment->SizeInBits;
- if (Start >= AbsEnd) {
- // No need to describe a SROAed padding.
- continue;
- }
- Size = std::min(Size, AbsEnd - Start);
- }
- // The new, smaller fragment is stenciled out from the old fragment.
- if (auto OrigFragment = FragmentExpr->getFragmentInfo()) {
- assert(Start >= OrigFragment->OffsetInBits &&
- "new fragment is outside of original fragment");
- Start -= OrigFragment->OffsetInBits;
- }
+ // Can't overlap with undef memory.
+ if (isKillAddress(DbgVariable))
+ return;
- // The alloca may be larger than the variable.
- auto VarSize = DbgVariable->getVariable()->getSizeInBits();
- if (VarSize) {
- if (Size > *VarSize)
- Size = *VarSize;
- if (Size == 0 || Start + Size > *VarSize)
- continue;
- }
+ const Value *DbgPtr = getAddress(DbgVariable);
+ DIExpression::FragmentInfo VarFrag =
+ DbgVariable->getFragmentOrEntireVariable();
+ // Get the address expression constant offset if one exists and the ops
+ // that come after it.
+ int64_t CurrentExprOffsetInBytes = 0;
+ SmallVector<uint64_t> PostOffsetOps;
+ if (!getAddressExpression(DbgVariable)
+ ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
+ return; // Couldn't interpret this DIExpression - drop the var.
+
+ // Offset defined by a DW_OP_LLVM_extract_bits_[sz]ext.
+ int64_t ExtractOffsetInBits = 0;
+ for (auto Op : getAddressExpression(DbgVariable)->expr_ops()) {
+ if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext ||
+ Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_sext) {
+ ExtractOffsetInBits = Op.getArg(0);
+ break;
+ }
+ }
- // Avoid creating a fragment expression that covers the entire variable.
- if (!VarSize || *VarSize != Size) {
- if (auto E =
- DIExpression::createFragmentExpression(Expr, Start, Size))
- FragmentExpr = *E;
- else
- continue;
- }
+ DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
+ for (auto Fragment : Fragments) {
+ int64_t OffsetFromLocationInBits;
+ std::optional<DIExpression::FragmentInfo> NewDbgFragment;
+ // Find the variable fragment that the new alloca slice covers.
+ // Drop debug info for this variable fragment if we can't compute an
+ // intersect between it and the alloca slice.
+ if (!DIExpression::calculateFragmentIntersect(
+ DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
+ CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
+ NewDbgFragment, OffsetFromLocationInBits))
+ continue; // Do not migrate this fragment to this slice.
+
+ // Zero sized fragment indicates there's no intersect between the variable
+ // fragment and the alloca slice. Skip this slice for this variable
+ // fragment.
+ if (NewDbgFragment && !NewDbgFragment->SizeInBits)
+ continue; // Do not migrate this fragment to this slice.
+
+ // No fragment indicates DbgVariable's variable or fragment exactly
+ // overlaps the slice; copy its fragment (or nullopt if there isn't one).
+ if (!NewDbgFragment)
+ NewDbgFragment = DbgVariable->getFragment();
+
+ // Reduce the new expression offset by the bit-extract offset since
+ // we'll be keeping that.
+ int64_t OffestFromNewAllocaInBits =
+ OffsetFromLocationInBits - ExtractOffsetInBits;
+ // We need to adjust an existing bit extract if the offset expression
+ // can't eat the slack (i.e., if the new offset would be negative).
+ int64_t BitExtractOffset =
+ std::min<int64_t>(0, OffestFromNewAllocaInBits);
+ // The magnitude of a negative value indicates the number of bits into
+ // the existing variable fragment that the memory region begins. The new
+ // variable fragment already excludes those bits - the new DbgPtr offset
+ // only needs to be applied if it's positive.
+ OffestFromNewAllocaInBits =
+ std::max(int64_t(0), OffestFromNewAllocaInBits);
+
+ // Rebuild the expression:
+ // {Offset(OffestFromNewAllocaInBits), PostOffsetOps, NewDbgFragment}
+ // Add NewDbgFragment later, because dbg.assigns don't want it in the
+ // address expression but the value expression instead.
+ DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
+ if (OffestFromNewAllocaInBits > 0) {
+ int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
+ NewExpr = DIExpression::prepend(NewExpr, /*flags=*/0, OffsetInBytes);
}
// Remove any existing intrinsics on the new alloca describing
@@ -5127,18 +5388,19 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
OldDII->eraseFromParent();
};
for_each(findDbgDeclares(Fragment.Alloca), RemoveOne);
- for_each(findDPVDeclares(Fragment.Alloca), RemoveOne);
+ for_each(findDVRDeclares(Fragment.Alloca), RemoveOne);
- insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, FragmentExpr, &AI);
+ insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, NewExpr, &AI,
+ NewDbgFragment, BitExtractOffset);
}
};
// Migrate debug information from the old alloca to the new alloca(s)
// and the individual partitions.
for_each(findDbgDeclares(&AI), MigrateOne);
- for_each(findDPVDeclares(&AI), MigrateOne);
+ for_each(findDVRDeclares(&AI), MigrateOne);
for_each(at::getAssignmentMarkers(&AI), MigrateOne);
- for_each(at::getDPVAssignmentMarkers(&AI), MigrateOne);
+ for_each(at::getDVRAssignmentMarkers(&AI), MigrateOne);
return Changed;
}
@@ -5177,7 +5439,7 @@ SROA::runOnAlloca(AllocaInst &AI) {
Changed = true;
return {Changed, CFGChanged};
}
- const DataLayout &DL = AI.getModule()->getDataLayout();
+ const DataLayout &DL = AI.getDataLayout();
// Skip alloca forms that this analysis can't handle.
auto *AT = AI.getAllocatedType();
@@ -5262,7 +5524,7 @@ bool SROA::deleteDeadInstructions(
DeletedAllocas.insert(AI);
for (DbgDeclareInst *OldDII : findDbgDeclares(AI))
OldDII->eraseFromParent();
- for (DPValue *OldDII : findDPVDeclares(AI))
+ for (DbgVariableRecord *OldDII : findDVRDeclares(AI))
OldDII->eraseFromParent();
}
@@ -5309,7 +5571,7 @@ bool SROA::promoteAllocas(Function &F) {
std::pair<bool /*Changed*/, bool /*CFGChanged*/> SROA::runSROA(Function &F) {
LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
- const DataLayout &DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getDataLayout();
BasicBlock &EntryBB = F.getEntryBlock();
for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
I != E; ++I) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp
index 4ce6ce93be33..cb1456b14632 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -29,7 +29,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeInstSimplifyLegacyPassPass(Registry);
initializeLegacyLICMPassPass(Registry);
initializeLoopDataPrefetchLegacyPassPass(Registry);
- initializeLoopRotateLegacyPassPass(Registry);
initializeLoopStrengthReducePass(Registry);
initializeLoopUnrollPass(Registry);
initializeLowerAtomicLegacyPassPass(Registry);
@@ -49,4 +48,5 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeSpeculativeExecutionLegacyPassPass(Registry);
initializeStraightLineStrengthReduceLegacyPassPass(Registry);
initializePlaceBackedgeSafepointsLegacyPassPass(Registry);
+ initializePostInlineEntryExitInstrumenterPass(Registry);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
index c01d03f64472..8eadf8900020 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
@@ -627,6 +627,7 @@ static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI,
Value *Ptr = CI->getArgOperand(0);
Value *Mask = CI->getArgOperand(1);
Value *PassThru = CI->getArgOperand(2);
+ Align Alignment = CI->getParamAlign(0).valueOrOne();
auto *VecType = cast<FixedVectorType>(CI->getType());
@@ -644,6 +645,10 @@ static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI,
// The result vector
Value *VResult = PassThru;
+ // Adjust alignment for the scalar instruction.
+ const Align AdjustedAlignment =
+ commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8);
+
// Shorten the way if the mask is a vector of constants.
// Create a build_vector pattern, with loads/poisons as necessary and then
// shuffle blend with the pass through value.
@@ -659,7 +664,7 @@ static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI,
} else {
Value *NewPtr =
Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
- InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1),
+ InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, AdjustedAlignment,
"Load" + Twine(Idx));
ShuffleMask[Idx] = Idx;
++MemIndex;
@@ -713,7 +718,7 @@ static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI,
CondBlock->setName("cond.load");
Builder.SetInsertPoint(CondBlock->getTerminator());
- LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1));
+ LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, AdjustedAlignment);
Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
// Move the pointer if there are more blocks to come.
@@ -755,6 +760,7 @@ static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI,
Value *Src = CI->getArgOperand(0);
Value *Ptr = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
+ Align Alignment = CI->getParamAlign(1).valueOrOne();
auto *VecType = cast<FixedVectorType>(Src->getType());
@@ -767,6 +773,10 @@ static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI,
Type *EltTy = VecType->getElementType();
+ // Adjust alignment for the scalar instruction.
+ const Align AdjustedAlignment =
+ commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8);
+
unsigned VectorWidth = VecType->getNumElements();
// Shorten the way if the mask is a vector of constants.
@@ -778,7 +788,7 @@ static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI,
Value *OneElt =
Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
- Builder.CreateAlignedStore(OneElt, NewPtr, Align(1));
+ Builder.CreateAlignedStore(OneElt, NewPtr, AdjustedAlignment);
++MemIndex;
}
CI->eraseFromParent();
@@ -824,7 +834,7 @@ static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI,
Builder.SetInsertPoint(CondBlock->getTerminator());
Value *OneElt = Builder.CreateExtractElement(Src, Idx);
- Builder.CreateAlignedStore(OneElt, Ptr, Align(1));
+ Builder.CreateAlignedStore(OneElt, Ptr, AdjustedAlignment);
// Move the pointer if there are more blocks to come.
Value *NewPtr;
@@ -852,6 +862,69 @@ static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI,
ModifiedDT = true;
}
+static void scalarizeMaskedVectorHistogram(const DataLayout &DL, CallInst *CI,
+ DomTreeUpdater *DTU,
+ bool &ModifiedDT) {
+ // If we extend histogram to return a result someday (like the updated vector)
+ // then we'll need to support it here.
+ assert(CI->getType()->isVoidTy() && "Histogram with non-void return.");
+ Value *Ptrs = CI->getArgOperand(0);
+ Value *Inc = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+
+ auto *AddrType = cast<FixedVectorType>(Ptrs->getType());
+ Type *EltTy = Inc->getType();
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ Builder.SetInsertPoint(InsertPt);
+
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // FIXME: Do we need to add an alignment parameter to the intrinsic?
+ unsigned VectorWidth = AddrType->getNumElements();
+
+ // Shorten the way if the mask is a vector of constants.
+ if (isConstantIntVector(Mask)) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
+ continue;
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
+ LoadInst *Load = Builder.CreateLoad(EltTy, Ptr, "Load" + Twine(Idx));
+ Value *Add = Builder.CreateAdd(Load, Inc);
+ Builder.CreateStore(Add, Ptr);
+ }
+ CI->eraseFromParent();
+ return;
+ }
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ Value *Predicate =
+ Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+
+ Instruction *ThenTerm =
+ SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
+ /*BranchWeights=*/nullptr, DTU);
+
+ BasicBlock *CondBlock = ThenTerm->getParent();
+ CondBlock->setName("cond.histogram.update");
+
+ Builder.SetInsertPoint(CondBlock->getTerminator());
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
+ LoadInst *Load = Builder.CreateLoad(EltTy, Ptr, "Load" + Twine(Idx));
+ Value *Add = Builder.CreateAdd(Load, Inc);
+ Builder.CreateStore(Add, Ptr);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
+ NewIfBlock->setName("else");
+ Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
+ }
+
+ CI->eraseFromParent();
+ ModifiedDT = true;
+}
+
static bool runImpl(Function &F, const TargetTransformInfo &TTI,
DominatorTree *DT) {
std::optional<DomTreeUpdater> DTU;
@@ -860,7 +933,7 @@ static bool runImpl(Function &F, const TargetTransformInfo &TTI,
bool EverMadeChange = false;
bool MadeChange = true;
- auto &DL = F.getParent()->getDataLayout();
+ auto &DL = F.getDataLayout();
while (MadeChange) {
MadeChange = false;
for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
@@ -928,6 +1001,12 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
switch (II->getIntrinsicID()) {
default:
break;
+ case Intrinsic::experimental_vector_histogram_add:
+ if (TTI.isLegalMaskedVectorHistogram(CI->getArgOperand(0)->getType(),
+ CI->getArgOperand(1)->getType()))
+ return false;
+ scalarizeMaskedVectorHistogram(DL, CI, DTU, ModifiedDT);
+ return true;
case Intrinsic::masked_load:
// Scalarize unsupported vector masked load
if (TTI.isLegalMaskedLoad(
@@ -969,12 +1048,16 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
return true;
}
case Intrinsic::masked_expandload:
- if (TTI.isLegalMaskedExpandLoad(CI->getType()))
+ if (TTI.isLegalMaskedExpandLoad(
+ CI->getType(),
+ CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne()))
return false;
scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT);
return true;
case Intrinsic::masked_compressstore:
- if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
+ if (TTI.isLegalMaskedCompressStore(
+ CI->getArgOperand(0)->getType(),
+ CI->getAttributes().getParamAttrs(1).getAlignment().valueOrOne()))
return false;
scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT);
return true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 3eca9ac7c267..2bed3480da1c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -523,8 +523,8 @@ void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
const ValueVector &CV) {
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
Op->getAllMetadataOtherThanDebugLoc(MDs);
- for (unsigned I = 0, E = CV.size(); I != E; ++I) {
- if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
+ for (Value *V : CV) {
+ if (Instruction *New = dyn_cast<Instruction>(V)) {
for (const auto &MD : MDs)
if (canTransferMetadata(MD.first))
New->setMetadata(MD.first, MD.second);
@@ -1107,7 +1107,7 @@ bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
return false;
std::optional<VectorLayout> Layout = getVectorLayout(
- LI.getType(), LI.getAlign(), LI.getModule()->getDataLayout());
+ LI.getType(), LI.getAlign(), LI.getDataLayout());
if (!Layout)
return false;
@@ -1133,7 +1133,7 @@ bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
Value *FullValue = SI.getValueOperand();
std::optional<VectorLayout> Layout = getVectorLayout(
- FullValue->getType(), SI.getAlign(), SI.getModule()->getDataLayout());
+ FullValue->getType(), SI.getAlign(), SI.getDataLayout());
if (!Layout)
return false;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 17c466f38c9c..73e3ff296cf1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -57,7 +57,7 @@
//
// base = gep a, 0, x, y
// load base
-// laod base + 1 * sizeof(float)
+// load base + 1 * sizeof(float)
// load base + 32 * sizeof(float)
// load base + 33 * sizeof(float)
//
@@ -174,6 +174,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -235,18 +236,16 @@ public:
/// \p UserChainTail Outputs the tail of UserChain so that we can
/// garbage-collect unused instructions in UserChain.
static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
- User *&UserChainTail, const DominatorTree *DT);
+ User *&UserChainTail);
/// Looks for a constant offset from the given GEP index without extracting
/// it. It returns the numeric value of the extracted constant offset (0 if
/// failed). The meaning of the arguments are the same as Extract.
- static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
- const DominatorTree *DT);
+ static int64_t Find(Value *Idx, GetElementPtrInst *GEP);
private:
- ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
- : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) {
- }
+ ConstantOffsetExtractor(BasicBlock::iterator InsertionPt)
+ : IP(InsertionPt), DL(InsertionPt->getDataLayout()) {}
/// Searches the expression that computes V for a non-zero constant C s.t.
/// V can be reassociated into the form V' + C. If the searching is
@@ -333,10 +332,9 @@ private:
SmallVector<CastInst *, 16> ExtInsts;
/// Insertion position of cloned instructions.
- Instruction *IP;
+ BasicBlock::iterator IP;
const DataLayout &DL;
- const DominatorTree *DT;
};
/// A pass that tries to split every GEP in the function into a variadic
@@ -393,6 +391,11 @@ private:
/// and returns true if the splitting succeeds.
bool splitGEP(GetElementPtrInst *GEP);
+ /// Tries to reorder the given GEP with the GEP that produces the base if
+ /// doing so results in producing a constant offset as the outermost
+ /// index.
+ bool reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI);
+
/// Lower a GEP with multiple indices into multiple GEPs with a single index.
/// Function splitGEP already split the original GEP into a variadic part and
/// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
@@ -519,12 +522,10 @@ bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
}
Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
- // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
- // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
- // FIXME: this does not appear to be covered by any tests
- // (with x86/aarch64 backends at least)
+ // Do not trace into "or" unless it is equivalent to "add".
+ // This is the case if the or's disjoint flag is set.
if (BO->getOpcode() == Instruction::Or &&
- !haveNoCommonBitsSet(LHS, RHS, SimplifyQuery(DL, DT, /*AC*/ nullptr, BO)))
+ !cast<PossiblyDisjointInst>(BO)->isDisjoint())
return false;
// FIXME: We don't currently support constants from the RHS of subs,
@@ -669,7 +670,7 @@ Value *ConstantOffsetExtractor::applyExts(Value *V) {
Instruction *Ext = I->clone();
Ext->setOperand(0, Current);
- Ext->insertBefore(IP);
+ Ext->insertBefore(*IP->getParent(), IP);
Current = Ext;
}
return Current;
@@ -778,9 +779,8 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
}
Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
- User *&UserChainTail,
- const DominatorTree *DT) {
- ConstantOffsetExtractor Extractor(GEP, DT);
+ User *&UserChainTail) {
+ ConstantOffsetExtractor Extractor(GEP->getIterator());
// Find a non-zero constant offset first.
APInt ConstantOffset =
Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
@@ -795,10 +795,9 @@ Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
return IdxWithoutConstOffset;
}
-int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP,
- const DominatorTree *DT) {
+int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP) {
// If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
- return ConstantOffsetExtractor(GEP, DT)
+ return ConstantOffsetExtractor(GEP->getIterator())
.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
GEP->isInBounds())
.getSExtValue();
@@ -814,7 +813,8 @@ bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize(
// Skip struct member indices which must be i32.
if (GTI.isSequential()) {
if ((*I)->getType() != PtrIdxTy) {
- *I = CastInst::CreateIntegerCast(*I, PtrIdxTy, true, "idxprom", GEP);
+ *I = CastInst::CreateIntegerCast(*I, PtrIdxTy, true, "idxprom",
+ GEP->getIterator());
Changed = true;
}
}
@@ -836,7 +836,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
// Tries to extract a constant offset from this GEP index.
int64_t ConstantOffset =
- ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT);
+ ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP);
if (ConstantOffset != 0) {
NeedsExtraction = true;
// A GEP may have multiple indices. We accumulate the extracted
@@ -970,6 +970,49 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
Variadic->eraseFromParent();
}
+bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
+ TargetTransformInfo &TTI) {
+ auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
+ if (!PtrGEP)
+ return false;
+
+ bool NestedNeedsExtraction;
+ int64_t NestedByteOffset =
+ accumulateByteOffset(PtrGEP, NestedNeedsExtraction);
+ if (!NestedNeedsExtraction)
+ return false;
+
+ unsigned AddrSpace = PtrGEP->getPointerAddressSpace();
+ if (!TTI.isLegalAddressingMode(GEP->getResultElementType(),
+ /*BaseGV=*/nullptr, NestedByteOffset,
+ /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace))
+ return false;
+
+ bool GEPInBounds = GEP->isInBounds();
+ bool PtrGEPInBounds = PtrGEP->isInBounds();
+ bool IsChainInBounds = GEPInBounds && PtrGEPInBounds;
+ if (IsChainInBounds) {
+ auto IsKnownNonNegative = [this](Value *V) {
+ return isKnownNonNegative(V, *DL);
+ };
+ IsChainInBounds &= all_of(GEP->indices(), IsKnownNonNegative);
+ if (IsChainInBounds)
+ IsChainInBounds &= all_of(PtrGEP->indices(), IsKnownNonNegative);
+ }
+
+ IRBuilder<> Builder(GEP);
+ // For trivial GEP chains, we can swap the indices.
+ Value *NewSrc = Builder.CreateGEP(
+ GEP->getSourceElementType(), PtrGEP->getPointerOperand(),
+ SmallVector<Value *, 4>(GEP->indices()), "", IsChainInBounds);
+ Value *NewGEP = Builder.CreateGEP(PtrGEP->getSourceElementType(), NewSrc,
+ SmallVector<Value *, 4>(PtrGEP->indices()),
+ "", IsChainInBounds);
+ GEP->replaceAllUsesWith(NewGEP);
+ RecursivelyDeleteTriviallyDeadInstructions(GEP);
+ return true;
+}
+
bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// Skip vector GEPs.
if (GEP->getType()->isVectorTy())
@@ -985,11 +1028,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
bool NeedsExtraction;
int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
- if (!NeedsExtraction)
- return Changed;
-
TargetTransformInfo &TTI = GetTTI(*GEP->getFunction());
+ if (!NeedsExtraction) {
+ Changed |= reorderGEP(GEP, TTI);
+ return Changed;
+ }
+
// If LowerGEP is disabled, before really splitting the GEP, check whether the
// backend supports the addressing mode we are about to produce. If no, this
// splitting probably won't be beneficial.
@@ -1026,7 +1071,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
Value *OldIdx = GEP->getOperand(I);
User *UserChainTail;
Value *NewIdx =
- ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT);
+ ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail);
if (NewIdx != nullptr) {
// Switches to the index with the constant offset removed.
GEP->setOperand(I, NewIdx);
@@ -1057,8 +1102,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
//
// TODO(jingyue): do some range analysis to keep as many inbounds as
// possible. GEPs with inbounds are more friendly to alias analysis.
+ // TODO(gep_nowrap): Preserve nuw at least.
bool GEPWasInBounds = GEP->isInBounds();
- GEP->setIsInBounds(false);
+ GEP->setNoWrapFlags(GEPNoWrapFlags::none());
// Lowers a GEP to either GEPs with a single index or arithmetic operations.
if (LowerGEP) {
@@ -1133,7 +1179,7 @@ bool SeparateConstOffsetFromGEP::run(Function &F) {
if (DisableSeparateConstOffsetFromGEP)
return false;
- DL = &F.getParent()->getDataLayout();
+ DL = &F.getDataLayout();
bool Changed = false;
for (BasicBlock &B : F) {
if (!DT->isReachableFromEntry(&B))
@@ -1188,9 +1234,11 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
if (LHS->getType() == RHS->getType()) {
ExprKey Key = createNormalizedCommutablePair(LHS, RHS);
if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {
- Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
+ Instruction *NewSExt =
+ new SExtInst(Dom, I->getType(), "", I->getIterator());
NewSExt->takeName(I);
I->replaceAllUsesWith(NewSExt);
+ NewSExt->setDebugLoc(I->getDebugLoc());
RecursivelyDeleteTriviallyDeadInstructions(I);
return true;
}
@@ -1199,9 +1247,11 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
if (LHS->getType() == RHS->getType()) {
if (auto *Dom =
findClosestMatchingDominator({LHS, RHS}, I, DominatingSubs)) {
- Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
+ Instruction *NewSExt =
+ new SExtInst(Dom, I->getType(), "", I->getIterator());
NewSExt->takeName(I);
I->replaceAllUsesWith(NewSExt);
+ NewSExt->setDebugLoc(I->getDebugLoc());
RecursivelyDeleteTriviallyDeadInstructions(I);
return true;
}
@@ -1321,7 +1371,7 @@ void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
Second->setOperand(1, Offset1);
// We changed p+o+c to p+c+o, p+c may not be inbound anymore.
- const DataLayout &DAL = First->getModule()->getDataLayout();
+ const DataLayout &DAL = First->getDataLayout();
APInt Offset(DAL.getIndexSizeInBits(
cast<PointerType>(First->getType())->getAddressSpace()),
0);
@@ -1330,8 +1380,9 @@ void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
uint64_t ObjectSize;
if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
Offset.ugt(ObjectSize)) {
- First->setIsInBounds(false);
- Second->setIsInBounds(false);
+ // TODO(gep_nowrap): Make flag preservation more precise.
+ First->setNoWrapFlags(GEPNoWrapFlags::none());
+ Second->setNoWrapFlags(GEPNoWrapFlags::none());
} else
First->setIsInBounds(true);
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 7eb0ba1c2c17..c235d2fb2a5b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -41,6 +41,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Use.h"
@@ -133,6 +134,7 @@ static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
"not-taken 1/<this option> times or less."),
cl::init(16));
+AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key;
namespace {
struct CompareDesc {
BranchInst *Term;
@@ -630,7 +632,8 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
} else {
// Create a new unconditional branch that will continue the loop as a new
// terminator.
- BranchInst::Create(ContinueBB, ParentBB);
+ Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
+ NewBI->setDebugLoc(BI.getDebugLoc());
}
BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
@@ -664,10 +667,12 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// Finish updating dominator tree and memory ssa for full unswitch.
if (FullUnswitch) {
if (MSSAU) {
- // Remove the cloned branch instruction.
- ParentBB->getTerminator()->eraseFromParent();
- // Create unconditional branch now.
- BranchInst::Create(ContinueBB, ParentBB);
+ Instruction *Term = ParentBB->getTerminator();
+ // Remove the cloned branch instruction and create unconditional branch
+ // now.
+ Instruction *NewBI = BranchInst::Create(ContinueBB, ParentBB);
+ NewBI->setDebugLoc(Term->getDebugLoc());
+ Term->eraseFromParent();
MSSAU->removeEdge(ParentBB, LoopExitBB);
}
DT.deleteEdge(ParentBB, LoopExitBB);
@@ -859,8 +864,11 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
OldPH->getTerminator()->eraseFromParent();
- // Now add the unswitched switch.
+ // Now add the unswitched switch. This new switch instruction inherits the
+ // debug location of the old switch, because it semantically replace the old
+ // one.
auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
+ NewSI->setDebugLoc(SIW->getDebugLoc());
SwitchInstProfUpdateWrapper NewSIW(*NewSI);
// Rewrite the IR for the unswitched basic blocks. This requires two steps.
@@ -970,8 +978,9 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
/*KeepOneInputPHIs*/ true);
}
// Now nuke the switch and replace it with a direct branch.
+ Instruction *NewBI = BranchInst::Create(CommonSuccBB, BB);
+ NewBI->setDebugLoc(SIW->getDebugLoc());
SIW.eraseFromParent();
- BranchInst::Create(CommonSuccBB, BB);
} else if (DefaultExitBB) {
assert(SI.getNumCases() > 0 &&
"If we had no cases we'd have a common successor!");
@@ -1243,9 +1252,12 @@ static BasicBlock *buildClonedLoopBlocks(
if (SE && isa<PHINode>(I))
SE->forgetValue(&I);
+ BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt();
+
auto *MergePN =
PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi");
- MergePN->insertBefore(MergeBB->getFirstInsertionPt());
+ MergePN->insertBefore(InsertPt);
+ MergePN->setDebugLoc(InsertPt->getDebugLoc());
I.replaceAllUsesWith(MergePN);
MergePN->addIncoming(&I, ExitBB);
MergePN->addIncoming(&ClonedI, ClonedExitBB);
@@ -1260,8 +1272,8 @@ static BasicBlock *buildClonedLoopBlocks(
Module *M = ClonedPH->getParent()->getParent();
for (auto *ClonedBB : NewBlocks)
for (Instruction &I : *ClonedBB) {
- RemapDPValueRange(M, I.getDbgValueRange(), VMap,
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+ RemapDbgRecordRange(M, I.getDbgRecordRange(), VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
RemapInstruction(&I, VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
if (auto *II = dyn_cast<AssumeInst>(&I))
@@ -1304,8 +1316,9 @@ static BasicBlock *buildClonedLoopBlocks(
else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
ClonedConditionToErase = SI->getCondition();
+ Instruction *BI = BranchInst::Create(ClonedSuccBB, ClonedParentBB);
+ BI->setDebugLoc(ClonedTerminator->getDebugLoc());
ClonedTerminator->eraseFromParent();
- BranchInst::Create(ClonedSuccBB, ClonedParentBB);
if (ClonedConditionToErase)
RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
@@ -2332,23 +2345,27 @@ static void unswitchNontrivialInvariants(
// nuke the initial terminator placed in the split block.
SplitBB->getTerminator()->eraseFromParent();
if (FullUnswitch) {
- // Splice the terminator from the original loop and rewrite its
- // successors.
- TI.moveBefore(*SplitBB, SplitBB->end());
-
// Keep a clone of the terminator for MSSA updates.
Instruction *NewTI = TI.clone();
NewTI->insertInto(ParentBB, ParentBB->end());
+ // Splice the terminator from the original loop and rewrite its
+ // successors.
+ TI.moveBefore(*SplitBB, SplitBB->end());
+ TI.dropLocation();
+
// First wire up the moved terminator to the preheaders.
if (BI) {
BasicBlock *ClonedPH = ClonedPHs.begin()->second;
BI->setSuccessor(ClonedSucc, ClonedPH);
BI->setSuccessor(1 - ClonedSucc, LoopPH);
Value *Cond = skipTrivialSelect(BI->getCondition());
- if (InsertFreeze)
- Cond = new FreezeInst(
- Cond, Cond->getName() + ".fr", BI);
+ if (InsertFreeze) {
+ // We don't give any debug location to the new freeze, because the
+ // BI (`dyn_cast<BranchInst>(TI)`) is an in-loop instruction hoisted
+ // out of the loop.
+ Cond = new FreezeInst(Cond, Cond->getName() + ".fr", BI->getIterator());
+ }
BI->setCondition(Cond);
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
} else {
@@ -2365,8 +2382,9 @@ static void unswitchNontrivialInvariants(
Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
if (InsertFreeze)
- SI->setCondition(new FreezeInst(
- SI->getCondition(), SI->getCondition()->getName() + ".fr", SI));
+ SI->setCondition(new FreezeInst(SI->getCondition(),
+ SI->getCondition()->getName() + ".fr",
+ SI->getIterator()));
// We need to use the set to populate domtree updates as even when there
// are multiple cases pointing at the same successor we only want to
@@ -2430,12 +2448,13 @@ static void unswitchNontrivialInvariants(
DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
}
- // After MSSAU update, remove the cloned terminator instruction NewTI.
- ParentBB->getTerminator()->eraseFromParent();
-
// Create a new unconditional branch to the continuing block (as opposed to
// the one cloned).
- BranchInst::Create(RetainedSuccBB, ParentBB);
+ Instruction *NewBI = BranchInst::Create(RetainedSuccBB, ParentBB);
+ NewBI->setDebugLoc(NewTI->getDebugLoc());
+
+ // After MSSAU update, remove the cloned terminator instruction NewTI.
+ NewTI->eraseFromParent();
} else {
assert(BI && "Only branches have partial unswitching.");
assert(UnswitchedSuccBBs.size() == 1 &&
@@ -2704,9 +2723,11 @@ static BranchInst *turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT,
if (MSSAU)
MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
- PHINode *Phi = PHINode::Create(SI->getType(), 2, "unswitched.select", SI);
+ PHINode *Phi =
+ PHINode::Create(SI->getType(), 2, "unswitched.select", SI->getIterator());
Phi->addIncoming(SI->getTrueValue(), ThenBB);
Phi->addIncoming(SI->getFalseValue(), HeadBB);
+ Phi->setDebugLoc(SI->getDebugLoc());
SI->replaceAllUsesWith(Phi);
SI->eraseFromParent();
@@ -3092,7 +3113,7 @@ injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
// unswitching will break. Better optimize it away later.
auto *InjectedCond =
ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
- Preheader->getTerminator());
+ Preheader->getTerminator()->getIterator());
BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
BB->getParent(), InLoopSucc);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 7017f6adf3a2..11de37f7a7c1 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -77,6 +77,9 @@ static cl::opt<bool> UserSinkCommonInsts(
"sink-common-insts", cl::Hidden, cl::init(false),
cl::desc("Sink common instructions (default = false)"));
+static cl::opt<bool> UserSpeculateUnpredictables(
+ "speculate-unpredictables", cl::Hidden, cl::init(false),
+ cl::desc("Speculate unpredictable branches (default = false)"));
STATISTIC(NumSimpl, "Number of blocks simplified");
@@ -142,8 +145,10 @@ performBlockTailMerging(Function &F, ArrayRef<BasicBlock *> BBs,
// And turn BB into a block that just unconditionally branches
// to the canonical block.
+ Instruction *BI = BranchInst::Create(CanonicalBB, BB);
+ BI->setDebugLoc(Term->getDebugLoc());
Term->eraseFromParent();
- BranchInst::Create(CanonicalBB, BB);
+
if (Updates)
Updates->push_back({DominatorTree::Insert, BB, CanonicalBB});
}
@@ -323,6 +328,8 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
Options.HoistCommonInsts = UserHoistCommonInsts;
if (UserSinkCommonInsts.getNumOccurrences())
Options.SinkCommonInsts = UserSinkCommonInsts;
+ if (UserSpeculateUnpredictables.getNumOccurrences())
+ Options.SpeculateUnpredictables = UserSpeculateUnpredictables;
}
SimplifyCFGPass::SimplifyCFGPass() {
@@ -349,7 +356,9 @@ void SimplifyCFGPass::printPipeline(
OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;";
OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts;";
OS << (Options.SpeculateBlocks ? "" : "no-") << "speculate-blocks;";
- OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch";
+ OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch;";
+ OS << (Options.SpeculateUnpredictables ? "" : "no-")
+ << "speculate-unpredictables";
OS << '>';
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index 7a5318d4404c..ed9c1828ce06 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -260,36 +260,47 @@ static InstructionCost ComputeSpeculationCost(const Instruction *I,
}
}
+// Do not hoist any debug info intrinsics.
+// ...
+// if (cond) {
+// x = y * z;
+// foo();
+// }
+// ...
+// -------- Which then becomes:
+// ...
+// if.then:
+// %x = mul i32 %y, %z
+// call void @llvm.dbg.value(%x, !"x", !DIExpression())
+// call void foo()
+//
+// SpeculativeExecution might decide to hoist the 'y * z' calculation
+// out of the 'if' block, because it is more efficient that way, so the
+// '%x = mul i32 %y, %z' moves to the block above. But it might also
+// decide to hoist the 'llvm.dbg.value' call.
+// This is incorrect, because even if we've moved the calculation of
+// 'y * z', we should not see the value of 'x' change unless we
+// actually go inside the 'if' block.
+
bool SpeculativeExecutionPass::considerHoistingFromTo(
BasicBlock &FromBlock, BasicBlock &ToBlock) {
SmallPtrSet<const Instruction *, 8> NotHoisted;
- const auto AllPrecedingUsesFromBlockHoisted = [&NotHoisted](const User *U) {
- // Debug variable has special operand to check it's not hoisted.
- if (const auto *DVI = dyn_cast<DbgVariableIntrinsic>(U)) {
- return all_of(DVI->location_ops(), [&NotHoisted](Value *V) {
- if (const auto *I = dyn_cast_or_null<Instruction>(V)) {
- if (!NotHoisted.contains(I))
- return true;
- }
- return false;
- });
- }
-
- // Usially debug label intrinsic corresponds to label in LLVM IR. In these
- // cases we should not move it here.
- // TODO: Possible special processing needed to detect it is related to a
- // hoisted instruction.
- if (isa<DbgLabelInst>(U))
- return false;
-
- for (const Value *V : U->operand_values()) {
- if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ auto HasNoUnhoistedInstr = [&NotHoisted](auto Values) {
+ for (const Value *V : Values) {
+ if (const auto *I = dyn_cast_or_null<Instruction>(V))
if (NotHoisted.contains(I))
return false;
- }
}
return true;
};
+ auto AllPrecedingUsesFromBlockHoisted =
+ [&HasNoUnhoistedInstr](const User *U) {
+ // Do not hoist any debug info intrinsics.
+ if (isa<DbgInfoIntrinsic>(U))
+ return false;
+
+ return HasNoUnhoistedInstr(U->operand_values());
+ };
InstructionCost TotalSpeculationCost = 0;
unsigned NotHoistedInstCount = 0;
@@ -316,7 +327,8 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
auto Current = I;
++I;
if (!NotHoisted.count(&*Current)) {
- Current->moveBeforePreserving(ToBlock.getTerminator());
+ Current->moveBefore(ToBlock.getTerminator());
+ Current->dropLocation();
}
}
return true;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 75910d7b698a..75585fcc8026 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -425,14 +425,12 @@ void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
// Returns true if A matches B + C where C is constant.
static bool matchesAdd(Value *A, Value *&B, ConstantInt *&C) {
- return (match(A, m_Add(m_Value(B), m_ConstantInt(C))) ||
- match(A, m_Add(m_ConstantInt(C), m_Value(B))));
+ return match(A, m_c_Add(m_Value(B), m_ConstantInt(C)));
}
// Returns true if A matches B | C where C is constant.
static bool matchesOr(Value *A, Value *&B, ConstantInt *&C) {
- return (match(A, m_Or(m_Value(B), m_ConstantInt(C))) ||
- match(A, m_Or(m_ConstantInt(C), m_Value(B))));
+ return match(A, m_c_Or(m_Value(B), m_ConstantInt(C)));
}
void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
@@ -715,7 +713,7 @@ namespace llvm {
PreservedAnalyses
StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) {
- const DataLayout *DL = &F.getParent()->getDataLayout();
+ const DataLayout *DL = &F.getDataLayout();
auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 7d96a3478858..9c711ec18382 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -772,7 +772,7 @@ void StructurizeCFG::simplifyAffectedPhis() {
bool Changed;
do {
Changed = false;
- SimplifyQuery Q(Func->getParent()->getDataLayout());
+ SimplifyQuery Q(Func->getDataLayout());
Q.DT = DT;
// Setting CanUseUndef to true might extend value liveness, set it to false
// to achieve better register pressure.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index c6e8505d5ab4..1b3e6d9549b8 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -349,7 +349,7 @@ static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) {
// does not write to memory and the load provably won't trap.
// Writes to memory only matter if they may alias the pointer
// being loaded from.
- const DataLayout &DL = L->getModule()->getDataLayout();
+ const DataLayout &DL = L->getDataLayout();
if (isModSet(AA->getModRefInfo(CI, MemoryLocation::get(L))) ||
!isSafeToLoadUnconditionally(L->getPointerOperand(), L->getType(),
L->getAlign(), DL, L))
@@ -509,8 +509,10 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) {
BasicBlock *NewEntry = BasicBlock::Create(F.getContext(), "", &F, HeaderBB);
NewEntry->takeName(HeaderBB);
HeaderBB->setName("tailrecurse");
- BranchInst *BI = BranchInst::Create(HeaderBB, NewEntry);
- BI->setDebugLoc(CI->getDebugLoc());
+ BranchInst::Create(HeaderBB, NewEntry);
+ // If the new branch preserves the debug location of CI, it could result in
+ // misleading stepping, if CI is located in a conditional branch.
+ // So, here we don't give any debug location to the new branch.
// Move all fixed sized allocas from HeaderBB to NewEntry.
for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(),
@@ -592,7 +594,7 @@ void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst *CI,
int OpndIdx) {
Type *AggTy = CI->getParamByValType(OpndIdx);
assert(AggTy);
- const DataLayout &DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getDataLayout();
// Get alignment of byVal operand.
Align Alignment(CI->getParamAlign(OpndIdx).valueOrOne());
@@ -601,7 +603,7 @@ void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst *CI,
// Put alloca into the entry block.
Value *NewAlloca = new AllocaInst(
AggTy, DL.getAllocaAddrSpace(), nullptr, Alignment,
- CI->getArgOperand(OpndIdx)->getName(), &*F.getEntryBlock().begin());
+ CI->getArgOperand(OpndIdx)->getName(), F.getEntryBlock().begin());
IRBuilder<> Builder(CI);
Value *Size = Builder.getInt64(DL.getTypeAllocSize(AggTy));
@@ -619,7 +621,7 @@ void TailRecursionEliminator::copyLocalTempOfByValueOperandIntoArguments(
CallInst *CI, int OpndIdx) {
Type *AggTy = CI->getParamByValType(OpndIdx);
assert(AggTy);
- const DataLayout &DL = F.getParent()->getDataLayout();
+ const DataLayout &DL = F.getDataLayout();
// Get alignment of byVal operand.
Align Alignment(CI->getParamAlign(OpndIdx).valueOrOne());
@@ -714,8 +716,9 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
// We found a return value we want to use, insert a select instruction to
// select it if we don't already know what our return value will be and
// store the result in our return value PHI node.
- SelectInst *SI = SelectInst::Create(
- RetKnownPN, RetPN, Ret->getReturnValue(), "current.ret.tr", Ret);
+ SelectInst *SI =
+ SelectInst::Create(RetKnownPN, RetPN, Ret->getReturnValue(),
+ "current.ret.tr", Ret->getIterator());
RetSelects.push_back(SI);
RetPN->addIncoming(SI, BB);
@@ -728,7 +731,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
// Now that all of the PHI nodes are in place, remove the call and
// ret instructions, replacing them with an unconditional branch.
- BranchInst *NewBI = BranchInst::Create(HeaderBB, Ret);
+ BranchInst *NewBI = BranchInst::Create(HeaderBB, Ret->getIterator());
NewBI->setDebugLoc(CI->getDebugLoc());
Ret->eraseFromParent(); // Remove return.
@@ -746,7 +749,7 @@ void TailRecursionEliminator::cleanupAndFinalize() {
// call.
for (PHINode *PN : ArgumentPHIs) {
// If the PHI Node is a dynamic constant, replace it with the value it is.
- if (Value *PNV = simplifyInstruction(PN, F.getParent()->getDataLayout())) {
+ if (Value *PNV = simplifyInstruction(PN, F.getDataLayout())) {
PN->replaceAllUsesWith(PNV);
PN->eraseFromParent();
}
@@ -776,6 +779,7 @@ void TailRecursionEliminator::cleanupAndFinalize() {
AccRecInstrNew->setOperand(AccRecInstr->getOperand(0) == AccPN,
RI->getOperand(0));
AccRecInstrNew->insertBefore(RI);
+ AccRecInstrNew->dropLocation();
RI->setOperand(0, AccRecInstrNew);
}
}
@@ -787,8 +791,9 @@ void TailRecursionEliminator::cleanupAndFinalize() {
if (!RI)
continue;
- SelectInst *SI = SelectInst::Create(
- RetKnownPN, RetPN, RI->getOperand(0), "current.ret.tr", RI);
+ SelectInst *SI =
+ SelectInst::Create(RetKnownPN, RetPN, RI->getOperand(0),
+ "current.ret.tr", RI->getIterator());
RetSelects.push_back(SI);
RI->setOperand(0, SI);
}
@@ -803,6 +808,7 @@ void TailRecursionEliminator::cleanupAndFinalize() {
AccRecInstrNew->setOperand(AccRecInstr->getOperand(0) == AccPN,
SI->getFalseValue());
AccRecInstrNew->insertBefore(SI);
+ AccRecInstrNew->dropLocation();
SI->setFalseValue(AccRecInstrNew);
}
}